1
1
name : Event Index
2
2
3
3
on :
4
- push :
5
- branches :
6
- - main
4
+ workflow_dispatch :
5
+
7
6
schedule :
8
7
# <minute [0,59]> <hour [0,23]> <day of the month [1,31]> <month of the year [1,12]> <day of the week [0,6]>
9
8
# https://pubs.opengroup.org/onlinepubs/9699919799/utilities/crontab.html#tag_20_25_07
12
11
# We offset from the hour and half hour to go easy on the servers :)
13
12
- cron : ' 26 3 * * 4'
14
13
14
+ # We doubly fan out
15
+ # We first generate indexs for uni, bi, and trigrams with a matrix
16
+ # Each index is split into chunks of 50,000 grams
17
+ # Then we fan out by every chunk and upload
18
+ {% raw %}
15
19
jobs :
16
- index-events :
20
+ generate- index-chunks :
17
21
runs-on : ubuntu-latest
18
22
strategy :
19
23
matrix :
20
- # We fan out on n-gram to make it possible to run on GitHub Actions
21
24
n-gram : [1, 2, 3]
25
+ fail-fast : false
26
+
27
+ outputs :
28
+ ngram-1-chunks : ${{ steps.output-index-chunks.outputs.ngram-1-chunks }}
29
+ ngram-2-chunks : ${{ steps.output-index-chunks.outputs.ngram-2-chunks }}
30
+ ngram-3-chunks : ${{ steps.output-index-chunks.outputs.ngram-3-chunks }}
22
31
23
32
steps :
33
+ # Setup Runner
24
34
- uses : actions/checkout@v2
25
35
- uses : actions/setup-python@v1
26
36
with :
27
37
python-version : 3.9
28
38
39
+ # Setup GCloud / Creds
29
40
- name : Setup gcloud
30
41
uses : google-github-actions/setup-gcloud@v0
31
42
with :
32
- project_id : {{ cookiecutter.infrastructure_slug }}
33
- service_account_key : {% raw %} ${{ secrets.GOOGLE_CREDENTIALS }}{% endraw % }
43
+ project_id : {% endraw %}{{ cookiecutter.infrastructure_slug }}{% raw % }
44
+ service_account_key : ${{ secrets.GOOGLE_CREDENTIALS }}
34
45
export_default_credentials : true
46
+ - name : Dump Credentials to JSON
47
+ run : |
48
+ echo "$GOOGLE_CREDS" > python/google-creds.json
49
+ env :
50
+ GOOGLE_CREDS : ${{ secrets.GOOGLE_CREDENTIALS }}
35
51
52
+ # Installs
36
53
- name : Install Python Dependencies
37
54
run : |
38
55
cd python/
39
56
pip install .
57
+
58
+ # Index
59
+ - name : Index Events ${{ matrix.n-gram }}-grams
60
+ run : |
61
+ cd python/
62
+ run_cdp_event_index_generation event-index-config.json \
63
+ --n_grams ${{ matrix.n-gram }} \
64
+ --store_remote \
65
+ --parallel
66
+
67
+ # Store generated files to step output
68
+ - name : Store Index Fileset to Outputs
69
+ id : output-index-chunks
70
+ run : |
71
+ cd python/index/
72
+ output=$(python -c 'import os, json; print(json.dumps(os.listdir(".")))')
73
+ echo "::set-output name=ngram-${{ matrix.n-gram }}-chunks::$output"
74
+
75
+ combine-matrix-ngram-chunks :
76
+ needs : generate-index-chunks
77
+ runs-on : ubuntu-latest
78
+ outputs :
79
+ all-chunks : ${{ steps.combine-index-chunks.outputs.combined-chunks }}
80
+
81
+ steps :
82
+ # Setup Runner
83
+ - uses : actions/checkout@v2
84
+ - uses : actions/setup-python@v1
85
+ with :
86
+ python-version : 3.9
87
+
88
+ # Process
89
+ - name : Combine Chunks
90
+ id : ' combine-index-chunks'
91
+ run : |
92
+ echo 'print(${{ needs.generate-index-chunks.outputs.ngram-1-chunks }} + ${{ needs.generate-index-chunks.outputs.ngram-2-chunks }} + ${{ needs.generate-index-chunks.outputs.ngram-3-chunks }})' >> print-combined-chunks.py
93
+ output=$(python print-combined-chunks.py)
94
+ echo "::set-output name=combined-chunks::$output"
95
+
96
+ upload-index-chunks :
97
+ needs : combine-matrix-ngram-chunks
98
+ runs-on : ubuntu-latest
99
+ strategy :
100
+ max-parallel : 6
101
+ matrix :
102
+ filename : ${{ fromJson(needs.combine-matrix-ngram-chunks.outputs.all-chunks) }}
103
+ fail-fast : false
104
+
105
+ steps :
106
+ # Setup Runner
107
+ - uses : actions/checkout@v2
108
+ - uses : actions/setup-python@v1
109
+ with :
110
+ python-version : 3.9
111
+
112
+ # Setup GCloud / Creds
113
+ - name : Setup gcloud
114
+ uses : google-github-actions/setup-gcloud@v0
115
+ with :
116
+ project_id : {% endraw $}{{ cookiecutter.infrastructure_slug }}{$ raw $}
117
+ service_account_key : ${{ secrets.GOOGLE_CREDENTIALS }}
118
+ export_default_credentials : true
40
119
- name : Dump Credentials to JSON
41
120
run : |
42
121
echo "$GOOGLE_CREDS" > python/google-creds.json
43
122
env :
44
- GOOGLE_CREDS : {% raw %}${{ secrets.GOOGLE_CREDENTIALS }}{% endraw %}
45
- - name : Index Events {% raw %}${{ matrix.n-gram }}{% endraw %}-grams
123
+ GOOGLE_CREDS : ${{ secrets.GOOGLE_CREDENTIALS }}
124
+
125
+ # Installs
126
+ - name : Install Python Dependencies
46
127
run : |
47
128
cd python/
48
- {% raw %}run_cdp_event_index event-index-config.json \
49
- --n_grams ${{ matrix.n-gram }} \
50
- --parallel{% endraw %}
129
+ pip install .
130
+
131
+ # Upload Index Chunk
132
+ - name : Process Upload
133
+ run : |
134
+ cd python/
135
+ process_cdp_event_index_chunk event-index-config.json \
136
+ ${{ matrix.filename }} \
137
+ --parallel
138
+ {% endraw %}
0 commit comments