Skip to content

Commit c83dcc1

Browse files
add finetune code for VCR
1 parent 3b7e306 commit c83dcc1

13 files changed

+1700
-0
lines changed

downstream/vcr/data/colormap.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright (c) 2017-present, Facebook, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
##############################################################################
15+
16+
"""An awesome colormap for really neat visualizations."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
from __future__ import unicode_literals
22+
23+
import numpy as np
24+
25+
color_list = np.array(
26+
[
27+
[255, 0, 0],
28+
[0, 255, 0],
29+
[236, 176, 31],
30+
[0, 0, 255],
31+
[255, 0, 255],
32+
[170, 0, 255],
33+
[255, 255, 0],
34+
[170, 84, 0],
35+
[84, 84, 0],
36+
[255, 127, 0],
37+
[76, 189, 237],
38+
[170, 0, 127],
39+
[125, 46, 141],
40+
[190, 190, 0],
41+
[161, 19, 46],
42+
[0, 170, 127],
43+
[255, 170, 127],
44+
[0, 84, 127],
45+
[255, 84, 127],
46+
[170, 170, 255],
47+
[170, 170, 127],
48+
[84, 0, 0],
49+
[0, 170, 0],
50+
[0, 255, 255],
51+
[255, 170, 255],
52+
[84, 0, 127],
53+
[255, 255, 127],
54+
[170, 0, 0],
55+
[84, 255, 127],
56+
[0, 0, 127],
57+
[170, 84, 127],
58+
[170, 84, 255],
59+
[170, 170, 0],
60+
[216, 82, 24],
61+
[0, 84, 0],
62+
[84, 0, 255],
63+
[255, 0, 127],
64+
[127, 0, 0],
65+
[170, 255, 127],
66+
[170, 255, 255],
67+
[0, 127, 0],
68+
[0, 0, 170],
69+
[84, 170, 127],
70+
[0, 113, 188],
71+
[118, 171, 47],
72+
[84, 84, 127],
73+
[0, 42, 0],
74+
[84, 84, 255],
75+
[84, 170, 0],
76+
[84, 170, 255],
77+
[170, 255, 0],
78+
[0, 0, 212],
79+
[0, 212, 0],
80+
[0, 0, 84],
81+
[0, 84, 255],
82+
[145, 145, 145]
83+
]
84+
)

downstream/vcr/data/draw_bbox.py

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import gc
2+
import json
3+
import argparse
4+
from tqdm import tqdm
5+
from downstream.vcr.data.colormap import color_list
6+
from PIL import Image
7+
import PIL.ImageDraw as ImageDraw
8+
9+
TRANSPARENCY = .15
10+
OPACITY = int(255 * TRANSPARENCY)
11+
12+
13+
parser = argparse.ArgumentParser(description='SCRAPE!')
14+
parser.add_argument(
15+
'-fold',
16+
dest='fold',
17+
default=0,
18+
type=int,
19+
help='which fold we are on'
20+
)
21+
parser.add_argument(
22+
'-num_folds',
23+
dest='num_folds',
24+
default=1,
25+
type=int,
26+
help='Number of folds (corresponding to both the number of training files and the number of testing files)',
27+
)
28+
parser.add_argument(
29+
'-split',
30+
dest='split',
31+
default='train',
32+
type=str,
33+
)
34+
parser.add_argument(
35+
'-mode',
36+
dest='mode',
37+
default='answer',
38+
type=str,
39+
)
40+
args = parser.parse_args()
41+
42+
43+
split = args.split
44+
mode = args.mode
45+
save_dir = f'bbox/{split}/{mode}'
46+
47+
VCR_DIRECTORY = ''
48+
items = [json.loads(s) for s in open(f'{VCR_DIRECTORY}/annotation/{split}.jsonl', 'r')]
49+
img_dir = f'{VCR_DIRECTORY}/vcr1images'
50+
51+
counter = 0
52+
for i, item in enumerate(tqdm(items)):
53+
if i % args.num_folds != args.fold:
54+
continue
55+
counter += 1
56+
57+
mentions = []
58+
objects = []
59+
60+
for word in item["question"]:
61+
if isinstance(word, list):
62+
mentions.extend([w for w in word if item["objects"][w] == "person"])
63+
objects.extend([w for w in word if item["objects"][w] != "person"])
64+
65+
for ans in item["answer_choices"]:
66+
for word in ans:
67+
if isinstance(word, list):
68+
mentions.extend([w for w in word if item["objects"][w] == "person"])
69+
objects.extend([w for w in word if item["objects"][w] != "person"])
70+
71+
if mode == 'rationale':
72+
for rat in item["rationale_choices"]:
73+
for word in rat:
74+
if isinstance(word, list):
75+
mentions.extend([w for w in word if item["objects"][w] == "person"])
76+
objects.extend([w for w in word if item["objects"][w] != "person"])
77+
78+
mentions = list(set(mentions))
79+
objects = list(set(objects))
80+
81+
image = Image.open(f'{img_dir}/{item["img_fn"]}').convert("RGBA")
82+
meta = json.load(open(f'{img_dir}/{item["metadata_fn"]}', 'r'))
83+
boxes = meta['boxes']
84+
85+
for i, box in enumerate(boxes):
86+
if i in mentions:
87+
color = color_list[:-1][i % (len(color_list) - 1)]
88+
elif i in objects:
89+
color = color_list[-1]
90+
else:
91+
continue
92+
93+
box = [int(x) for x in box[:4]]
94+
x1, y1, x2, y2 = box
95+
shape = [(x1, y1), (x2, y1), (x2, y2), (x1, y2), (x1, y1)]
96+
97+
overlay = Image.new('RGBA', image.size, tuple(color) + (0,))
98+
draw = ImageDraw.Draw(overlay)
99+
draw.polygon(shape, fill=tuple(color) + (OPACITY,))
100+
101+
draw = ImageDraw.Draw(image)
102+
draw.line(shape, fill=tuple(color), width=7)
103+
104+
image = Image.alpha_composite(image, overlay)
105+
106+
image = image.convert("RGB")
107+
image.save(f'{save_dir}/{item["annot_id"]}.jpg')
108+
109+
gc.collect()
110+
111+
print(f'writing {counter} examples')
112+

downstream/vcr/data/draw_bbox.sh

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env bash
2+
3+
export NUM_FOLDS=64
4+
5+
mkdir -p bbox
6+
mkdir -p bbox/train
7+
mkdir -p bbox/train/answer
8+
mkdir -p bbox/train/rationale
9+
mkdir -p bbox/val
10+
mkdir -p bbox/val/answer
11+
mkdir -p bbox/val/rationale
12+
13+
# Training
14+
mkdir -p bbox_logs
15+
mkdir -p bbox_logs/answer
16+
mkdir -p bbox_logs/rationale
17+
18+
parallel -j $(nproc --all) --will-cite "python draw_bbox.py -fold {1} -num_folds ${NUM_FOLDS} -split train -mode answer > bbox_logs/answer/trainlog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
19+
20+
parallel -j $(nproc --all) --will-cite "python draw_bbox.py -fold {1} -num_folds ${NUM_FOLDS} -split train -mode rationale > bbox_logs/rationale/trainlog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
21+
22+
parallel -j $(nproc --all) --will-cite "python draw_bbox.py -fold {1} -num_folds ${NUM_FOLDS} -split val -mode answer > bbox_logs/answer/vallog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
23+
24+
parallel -j $(nproc --all) --will-cite "python draw_bbox.py -fold {1} -num_folds ${NUM_FOLDS} -split val -mode rationale > bbox_logs/rationale/vallog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))

downstream/vcr/data/draw_segms.py

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import gc
2+
import json
3+
import argparse
4+
from tqdm import tqdm
5+
from downstream.vcr.data.colormap import color_list
6+
from PIL import Image
7+
import PIL.ImageDraw as ImageDraw
8+
9+
TRANSPARENCY = .15
10+
OPACITY = int(255 * TRANSPARENCY)
11+
12+
13+
parser = argparse.ArgumentParser(description='SCRAPE!')
14+
parser.add_argument(
15+
'-fold',
16+
dest='fold',
17+
default=0,
18+
type=int,
19+
help='which fold we are on'
20+
)
21+
parser.add_argument(
22+
'-num_folds',
23+
dest='num_folds',
24+
default=1,
25+
type=int,
26+
help='Number of folds (corresponding to both the number of training files and the number of testing files)',
27+
)
28+
parser.add_argument(
29+
'-split',
30+
dest='split',
31+
default='train',
32+
type=str,
33+
)
34+
parser.add_argument(
35+
'-mode',
36+
dest='mode',
37+
default='answer',
38+
type=str,
39+
)
40+
args = parser.parse_args()
41+
42+
43+
split = args.split
44+
mode = args.mode
45+
save_dir = f'segm/{split}/{mode}'
46+
47+
VCR_DIRECTORY = ''
48+
items = [json.loads(s) for s in open(f'{VCR_DIRECTORY}/annotation/{split}.jsonl', 'r')]
49+
img_dir = f'{VCR_DIRECTORY}/vcr1images'
50+
51+
counter = 0
52+
for i, item in enumerate(tqdm(items)):
53+
if i % args.num_folds != args.fold:
54+
continue
55+
counter += 1
56+
57+
mentions = []
58+
objects = []
59+
60+
for word in item["question"]:
61+
if isinstance(word, list):
62+
mentions.extend([w for w in word if item["objects"][w] == "person"])
63+
objects.extend([w for w in word if item["objects"][w] != "person"])
64+
65+
for ans in item["answer_choices"]:
66+
for word in ans:
67+
if isinstance(word, list):
68+
mentions.extend([w for w in word if item["objects"][w] == "person"])
69+
objects.extend([w for w in word if item["objects"][w] != "person"])
70+
71+
if mode == 'rationale':
72+
for rat in item["rationale_choices"]:
73+
for word in rat:
74+
if isinstance(word, list):
75+
mentions.extend([w for w in word if item["objects"][w] == "person"])
76+
objects.extend([w for w in word if item["objects"][w] != "person"])
77+
78+
mentions = list(set(mentions))
79+
objects = list(set(objects))
80+
81+
image = Image.open(f'{img_dir}/{item["img_fn"]}').convert("RGBA")
82+
meta = json.load(open(f'{img_dir}/{item["metadata_fn"]}', 'r'))
83+
segms = meta['segms']
84+
85+
for i, segm in enumerate(segms):
86+
if i in mentions:
87+
color = color_list[:-1][i % (len(color_list) - 1)]
88+
elif i in objects:
89+
color = color_list[-1]
90+
else:
91+
continue
92+
93+
overlay = Image.new('RGBA', image.size, tuple(color) + (0,))
94+
draw = ImageDraw.Draw(overlay)
95+
for segm_part in segm:
96+
if len(segm_part) < 2:
97+
segm_part += tuple([segm_part[0]])
98+
99+
segm_part = tuple(tuple(x) for x in segm_part)
100+
draw.polygon(segm_part, fill=tuple(color) + (OPACITY,))
101+
102+
draw = ImageDraw.Draw(image)
103+
for segm_part in segm:
104+
segm_part = tuple(tuple(x) for x in segm_part)
105+
segm_part += tuple([segm_part[0]])
106+
draw.line(segm_part, fill=tuple(color), width=7)
107+
image = Image.alpha_composite(image, overlay)
108+
109+
image = image.convert("RGB")
110+
image.save(f'{save_dir}/{item["annot_id"]}.jpg')
111+
112+
gc.collect()
113+
114+
print(f'writing {counter} examples')
115+

downstream/vcr/data/draw_segms.sh

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env bash
2+
3+
export NUM_FOLDS=64
4+
5+
mkdir -p segm
6+
mkdir -p segm/train
7+
mkdir -p segm/train/answer
8+
mkdir -p segm/train/rationale
9+
mkdir -p segm/val
10+
mkdir -p segm/val/answer
11+
mkdir -p segm/val/rationale
12+
13+
# Training
14+
mkdir -p segm_logs
15+
mkdir -p segm_logs/answer
16+
mkdir -p segm_logs/rationale
17+
18+
parallel -j $(nproc --all) --will-cite "python draw_segms.py -fold {1} -num_folds ${NUM_FOLDS} -split train -mode answer > segm_logs/answer/trainlog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
19+
20+
parallel -j $(nproc --all) --will-cite "python draw_segms.py -fold {1} -num_folds ${NUM_FOLDS} -split train -mode rationale > segm_logs/rationale/trainlog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
21+
22+
parallel -j $(nproc --all) --will-cite "python draw_segms.py -fold {1} -num_folds ${NUM_FOLDS} -split val -mode answer > segm_logs/answer/vallog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))
23+
24+
parallel -j $(nproc --all) --will-cite "python draw_segms.py -fold {1} -num_folds ${NUM_FOLDS} -split val -mode rationale > segm_logs/rationale/vallog{1}.txt" ::: $(seq 0 $((${NUM_FOLDS}-1)))

0 commit comments

Comments
 (0)