Skip to content

Commit 8effa32

Browse files
authored
Add files via upload
1 parent 698eb9c commit 8effa32

File tree

7 files changed

+175
-0
lines changed

7 files changed

+175
-0
lines changed

LICENSE

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
Apache License
23
Version 2.0, January 2004
34
http://www.apache.org/licenses/

README.md

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# RVS: A Novel Dataset for Spatial Compositional Reasoning
2+
3+
## Data
4+
5+
The data can be found here -
6+
XXX
7+
8+
The data contains four json files corresponding to four split-sets: train (Manhattan), seen-city development (Manhattan), unseen-city development (Pittsburgh) ,and test (Philadelphia).
9+
10+
Each sample contains the following:
11+
12+
* content - navigation instruction.
13+
* rvs_start_point - the coordinates of the start location.
14+
* rvs_goal_point - the coordinates of the goal location.
15+
16+
17+
## Run model
18+
```
19+
bazel-bin/rvs/model/text/model_trainer --processed_data_dir OUTDIR --train_region Manhattan --dev_region Manhattan_dev --test_region Manhattan_dev --output_dir OUTDIR
20+
```
21+
22+
## Installation:
23+
### Install conda environment
24+
```
25+
conda create --name rvs -y
26+
conda activate rvs
27+
python3 -m pip install -r requirements.txt
28+
29+
```
30+
31+
### Install BAZEL
32+
```
33+
apt install bazel
34+
```
35+
36+
### BAZEL: Build
37+
```
38+
source build_all.sh
39+
```
40+
41+
### BAZEL: Test
42+
```
43+
source test_all.sh
44+
```
45+
46+
### BAZEL: Run
47+
```
48+
source run_all.sh
49+
```
50+
51+
52+
53+

bazelisk

4.97 MB
Binary file not shown.

build_all.sh

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Check the types in everything.
2+
pytype rvs
3+
4+
# Build everything
5+
bazelisk query rvs/... | xargs bazelisk build

requirements.txt

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
absl-py==1.4.0
2+
attrs==22.2.0
3+
flashtext==2.7
4+
folium==0.14.0
5+
geographiclib==2.0
6+
geopy==2.3.0
7+
inflect==6.0.2
8+
nltk==3.8.1
9+
s2geometry==0.9.0
10+
scikit-learn==1.2.0
11+
scipy==1.10.0
12+
Shapely==1.8.5.post1
13+
SPARQLWrapper==2.0.0
14+
swifter==1.3.4
15+
tokenizers==0.13.2
16+
transformers==4.25.1
17+
vector-quantize-pytorch==1.0.4
18+
Fiona==1.8.22
19+
numpy==1.24.1
20+
pandas==1.5.2
21+
scikit-learn==1.2.0
22+
geopandas==0.12.2
23+
geographiclib==2.0
24+
gensim==4.3.0
25+
networkx==2.8.8
26+
node2vec==0.4.6
27+
osmnx==0.16.2
28+
mapply==0.1.18
29+
fuzzywuzzy==0.18.0
30+
sentencepiece==0.1.99
31+
torch==2.0.1
32+
torchvision==0.15.2
33+
torchaudio==2.0.2

run_all.sh

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/bin/bash
2+
3+
REGION_NAME="UTAustin"
4+
5+
OUTPUT_DIR=$HOME/tmp/rvs_run/$REGION_NAME
6+
MAP_DIR=$OUTPUT_DIR/map
7+
8+
OUTPUT_DIR_MODEL=$OUTPUT_DIR/rvs_run/manhattan
9+
OUTPUT_DIR_MODEL_RVS=$OUTPUT_DIR_MODEL/rvs
10+
OUTPUT_DIR_MODEL_RVS_FIXED_4=$OUTPUT_DIR_MODEL/rvs/fixed_4
11+
OUTPUT_DIR_MODEL_RVS_FIXED_5=$OUTPUT_DIR_MODEL/rvs/fixed_5
12+
13+
OUTPUT_DIR_MODEL_HUMAN=$OUTPUT_DIR_MODEL/human
14+
15+
16+
17+
echo "****************************************"
18+
echo "* Geo *"
19+
echo "****************************************"
20+
21+
rm -rf $OUTPUT_DIR
22+
mkdir -p $OUTPUT_DIR
23+
mkdir -p $MAP_DIR
24+
25+
bazel-bin/rvs/geo/map_processing/map_processor --region $REGION_NAME --min_s2_level 18 --directory $MAP_DIR
26+
bazel-bin/rvs/geo/sample_poi --region $REGION_NAME --min_s2_level 18 --directory $MAP_DIR --path $MAP_DIR/utaustin_geo_paths.gpkg --n_samples 8
27+
28+
echo "****************************************"
29+
echo "* graph embeddings *"
30+
echo "****************************************"
31+
32+
GRAPH_EMBEDDING_PATH=$MAP_DIR/graph_embedding.pth
33+
bazel-bin/rvs/data/metagraph/create_graph_embedding --region $REGION_NAME --dimensions 224 --s2_level 15 --s2_node_levels 15 --base_osm_map_filepath $MAP_DIR --save_embedding_path $GRAPH_EMBEDDING_PATH --num_walks 2 --walk_length 2
34+
35+
echo "****************************************"
36+
echo "* models *"
37+
echo "****************************************"
38+
mkdir -p $OUTPUT_DIR_MODEL
39+
mkdir -p $OUTPUT_DIR_MODEL_RVS
40+
mkdir -p $OUTPUT_DIR_MODEL_RVS_FIXED_4
41+
mkdir -p $OUTPUT_DIR_MODEL_RVS_FIXED_5
42+
mkdir -p $OUTPUT_DIR_MODEL_HUMAN
43+
44+
echo "* Dual-Encoder-Bert - HUMAN DATA *"
45+
bazel-bin/rvs/model/text/model_trainer --raw_data_dir ~/RVS/dataset --processed_data_dir $OUTPUT_DIR_MODEL_HUMAN --train_region Manhattan --dev_region Manhattan --test_region Manhattan --s2_level 15 --output_dir $OUTPUT_DIR_MODEL_HUMAN --num_epochs 1 --task RVS --model Dual-Encoder-Bert
46+
47+
echo "* Classification-Bert - HUMAN DATA *"
48+
bazel-bin/rvs/model/text/model_trainer --raw_data_dir ~/RVS/dataset --processed_data_dir $OUTPUT_DIR_MODEL_HUMAN --train_region Manhattan --dev_region Manhattan --test_region Manhattan --s2_level 15 --output_dir $OUTPUT_DIR_MODEL_HUMAN --num_epochs 1 --task RVS --model Classification-Bert
49+
50+
echo "* S2-Generation-T5 - HUMAN DATA *"
51+
bazel-bin/rvs/model/text/model_trainer --raw_data_dir ~/RVS/dataset --processed_data_dir $OUTPUT_DIR_MODEL_HUMAN --train_region Manhattan --dev_region Manhattan --test_region Manhattan --s2_level 15 --output_dir $OUTPUT_DIR_MODEL_HUMAN --num_epochs 1 --task RVS --model S2-Generation-T5 --train_batch_size 20 --test_batch_size 40
52+
53+
echo "* S2-Generation-T5-start-text-input - HUMAN DATA *"
54+
bazel-bin/rvs/model/text/model_trainer --raw_data_dir ~/RVS/dataset --processed_data_dir $OUTPUT_DIR_MODEL_HUMAN --train_region Manhattan --dev_region Manhattan --test_region Manhattan --s2_level 15 --output_dir $OUTPUT_DIR_MODEL_HUMAN --num_epochs 1 --task RVS --model S2-Generation-T5-start-text-input --train_batch_size 20 --test_batch_size 40
55+
56+
echo "* S2-Generation-T5-Landmarks - HUMAN DATA *"
57+
bazel-bin/rvs/model/text/model_trainer --raw_data_dir ~/RVS/dataset --processed_data_dir $OUTPUT_DIR_MODEL_HUMAN --train_region Manhattan --dev_region Manhattan --test_region Manhattan --s2_level 15 --output_dir $OUTPUT_DIR_MODEL_HUMAN --num_epochs 1 --task RVS --model S2-Generation-T5-Landmarks --train_batch_size 20 --test_batch_size 40
58+
59+
echo "* Baseline *"
60+
bazel-bin/rvs/model/baselines --raw_data_dir ~/RVS/dataset --metrics_dir $OUTPUT_DIR_MODEL_HUMAN --task RVS --region Philadelphia
61+
62+
63+
echo "****************************************"
64+
echo "* Wikidata *"
65+
echo "****************************************"
66+
bazel-bin/rvs/data/wikidata/extract_geofenced_wikidata_items --region $REGION_NAME
67+
68+
echo "****************************************"
69+
echo "* Wikipedia *"
70+
echo "****************************************"
71+
bazel-bin/rvs/data/wikipedia/extract_wikipedia_items --titles=New_York_Stock_Exchange,Empire_State_Building
72+
73+
echo "****************************************"
74+
echo "* Wikigeo *"
75+
echo "****************************************"
76+
bazel-bin/rvs/data/create_wikigeo_dataset --region $REGION_NAME --output_dir $OUTPUT_DIR/wikigeo
77+
bazel-bin/rvs/data/create_wikigeo_dataset --region $REGION_NAME --output_dir $OUTPUT_DIR/wikigeo --osm_path $MAP_DIR/utaustin_poi.pkl
78+
79+
80+
echo "Delete DATA"
81+
rm -rf $OUTPUT_DIR
82+

test_all.sh

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bazelisk query rvs/... | xargs bazelisk test

0 commit comments

Comments
 (0)