-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ca24e9d
commit 058ee0d
Showing
7 changed files
with
236 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cmd/mocword_download/mocword_download |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
.PHONY: build | ||
build: | ||
cd cmd/mocword_download && go build | ||
|
||
|
||
.PHONY: download | ||
download: | ||
cmd/mocword_download/mocword_download | ||
|
||
|
||
.PHONY: check | ||
check: | ||
find . -print | grep --regex '.*\.go' | xargs goimports -w -local "github.com/high-moctane/mocword" | ||
staticcheck ./... | ||
|
||
|
||
.PHONY: test | ||
test: | ||
go test ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"log" | ||
|
||
"github.com/high-moctane/mocword" | ||
) | ||
|
||
func main() { | ||
if err := mocword.RunDownload(context.Background()); err != nil { | ||
log.Fatal(err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
package mocword | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"gorm.io/driver/sqlite" | ||
"gorm.io/gorm" | ||
"log" | ||
"os" | ||
"sync" | ||
) | ||
|
||
func RunDownload(ctx context.Context) error { | ||
conn, err := NewConn() | ||
if err != nil { | ||
return fmt.Errorf("failed to open conn: %w", err) | ||
} | ||
|
||
if err := Migrate(ctx, conn); err != nil { | ||
return fmt.Errorf("failed to migrate: %w", err) | ||
} | ||
|
||
if err := DownloadAndSave(ctx, conn, 1); err != nil { | ||
return fmt.Errorf("failed to download and save 1-grams: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func NewConn() (*gorm.DB, error) { | ||
conn, err := gorm.Open(sqlite.Open("file:data.sqlite?cache=shared"), &gorm.Config{}) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to open conn: %w", err) | ||
} | ||
db, err := conn.DB() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to open db: %w", err) | ||
} | ||
db.SetMaxOpenConns(1) | ||
|
||
return conn, nil | ||
} | ||
|
||
func Migrate(ctx context.Context, conn *gorm.DB) error { | ||
conn.WithContext(ctx).AutoMigrate(&FetchedFile{}) | ||
conn.WithContext(ctx).AutoMigrate(&OneGramRecord{}) | ||
conn.WithContext(ctx).AutoMigrate(&TwoGramRecord{}) | ||
conn.WithContext(ctx).AutoMigrate(&ThreeGramRecord{}) | ||
conn.WithContext(ctx).AutoMigrate(&FourGramRecord{}) | ||
conn.WithContext(ctx).AutoMigrate(&FiveGramRecord{}) | ||
return nil | ||
} | ||
|
||
func DownloadAndSave(ctx context.Context, conn *gorm.DB, n int) error { | ||
queryCtx, queryCancel := context.WithCancel(ctx) | ||
queryCh := make(chan FetchedFile) | ||
var queryWg sync.WaitGroup | ||
|
||
// Query | ||
for idx := 0; idx < TotalFileNum(n); idx++ { | ||
queryWg.Add(1) | ||
go func(queryCtx context.Context, idx int) { | ||
defer queryWg.Done() | ||
|
||
queryCh <- FetchedFile{n, idx} | ||
}(queryCtx, idx) | ||
} | ||
|
||
// Download | ||
dlCtx, dlCancel := context.WithCancel(ctx) | ||
dlCh := make(chan os.File) | ||
var dlWg sync.WaitGroup | ||
dlWg.Add(1) | ||
go func(queryCtx, dlCtx context.Context) { | ||
defer dlWg.Done() | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
return | ||
default: | ||
} | ||
|
||
var query FetchedFile | ||
|
||
select { | ||
case q := <-queryCh: | ||
query = q | ||
case <-queryCtx.Done(): | ||
q, ok := <-queryCh | ||
if !ok { | ||
return | ||
} | ||
query = q | ||
case <-dlCtx.Done(): | ||
return | ||
} | ||
|
||
dlWg.Add(1) | ||
go func() { | ||
defer dlWg.Done() | ||
|
||
client := http.DefaultClient | ||
}() | ||
} | ||
}(queryCtx, dlCtx) | ||
|
||
queryWg.Wait() | ||
queryCancel() | ||
|
||
return nil | ||
} | ||
|
||
func TotalFileNum(n int) int { | ||
switch n { | ||
case 1: | ||
return 24 | ||
case 2: | ||
return 589 | ||
case 3: | ||
return 6881 | ||
case 4: | ||
return 6668 | ||
case 5: | ||
return 19423 | ||
} | ||
log.Panic("invalid n: %v", n) | ||
return 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package mocword | ||
|
||
type FetchedFile struct { | ||
N int `gorm:"primaryKey"` | ||
Idx int `gorm:"primaryKey"` | ||
} | ||
|
||
type OneGramRecord struct { | ||
ID int64 | ||
Word string | ||
Score int64 | ||
} | ||
|
||
type TwoGramRecord struct { | ||
Word1 int64 `gorm:"primaryKey"` | ||
Word2 int64 `gorm:"primaryKey"` | ||
Score int64 | ||
} | ||
|
||
type ThreeGramRecord struct { | ||
Word1 int64 `gorm:"primaryKey"` | ||
Word2 int64 `gorm:"primaryKey"` | ||
Word3 int64 `gorm:"primaryKey"` | ||
Score int64 | ||
} | ||
|
||
type FourGramRecord struct { | ||
Word1 int64 `gorm:"primaryKey"` | ||
Word2 int64 `gorm:"primaryKey"` | ||
Word3 int64 `gorm:"primaryKey"` | ||
Word4 int64 `gorm:"primaryKey"` | ||
Score int64 | ||
} | ||
|
||
type FiveGramRecord struct { | ||
Word1 int64 `gorm:"primaryKey"` | ||
Word2 int64 `gorm:"primaryKey"` | ||
Word3 int64 `gorm:"primaryKey"` | ||
Word4 int64 `gorm:"primaryKey"` | ||
Word5 int64 `gorm:"primaryKey"` | ||
Score int64 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
module github.com/high-moctane/mocword | ||
|
||
go 1.17 | ||
|
||
require ( | ||
github.com/jinzhu/inflection v1.0.0 // indirect | ||
github.com/jinzhu/now v1.1.4 // indirect | ||
github.com/mattn/go-sqlite3 v1.14.9 // indirect | ||
gorm.io/driver/sqlite v1.2.6 // indirect | ||
gorm.io/gorm v1.22.4 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= | ||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= | ||
github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= | ||
github.com/jinzhu/now v1.1.3/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= | ||
github.com/jinzhu/now v1.1.4 h1:tHnRBy1i5F2Dh8BAFxqFzxKqqvezXrL2OW1TnX+Mlas= | ||
github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= | ||
github.com/mattn/go-sqlite3 v1.14.9 h1:10HX2Td0ocZpYEjhilsuo6WWtUqttj2Kb0KtD86/KYA= | ||
github.com/mattn/go-sqlite3 v1.14.9/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= | ||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||
gorm.io/driver/sqlite v1.2.6 h1:SStaH/b+280M7C8vXeZLz/zo9cLQmIGwwj3cSj7p6l4= | ||
gorm.io/driver/sqlite v1.2.6/go.mod h1:gyoX0vHiiwi0g49tv+x2E7l8ksauLK0U/gShcdUsjWY= | ||
gorm.io/gorm v1.22.3/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0= | ||
gorm.io/gorm v1.22.4 h1:8aPcyEJhY0MAt8aY6Dc524Pn+pO29K+ydu+e/cXSpQM= | ||
gorm.io/gorm v1.22.4/go.mod h1:1aeVC+pe9ZmvKZban/gW4QPra7PRoTEssyc922qCAkk= |