Skip to content

Commit

Permalink
Merge pull request #11 from foldfelis/url4k
Browse files Browse the repository at this point in the history
support url for kaggle
  • Loading branch information
foldfelis authored Sep 1, 2021
2 parents 71e11c0 + f3ee250 commit 152aede
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 3 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ dataset = "ningjingyu/fetchtest"
kdownload(dataset, pwd())
```

And via url as well

```julia
using Fetch
url = "https://www.kaggle.com/ningjingyu/fetchtest"
kdownload(url, pwd())
```

## Intergrate with DataDeps.jl

According to [DataDeps.jl](https://github.com/oxinabox/DataDeps.jl),
Expand Down
22 changes: 20 additions & 2 deletions src/kaggle.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
export kdownload

const KAGGLE_DOMAIN = "www.kaggle.com"
const KAGGLE_API = "https://www.kaggle.com/api/v1/datasets/download"

struct Auth
username::String
key::String
Expand All @@ -24,10 +27,25 @@ function gen_auth_key()
end

function gen_kaggle_url(dataset)
return "https://www.kaggle.com/api/v1/datasets/download/$dataset"
return "$KAGGLE_API/$dataset"
end

function kaggle_url2dataset(url_or_dataset)
if contains(url_or_dataset, KAGGLE_DOMAIN)
user_name, dataset_name = match(Regex("$KAGGLE_DOMAIN/([^/]+)/([^/]+)"), url_or_dataset).captures
dataset = "$user_name/$dataset_name"
else
dataset = url_or_dataset
end

@assert HTTP.request("HEAD", "https://$KAGGLE_DOMAIN/$dataset").status == 200

return dataset
end

function kdownload(dataset, localdir)
function kdownload(url_or_dataset, localdir)
dataset = kaggle_url2dataset(url_or_dataset)

url = gen_kaggle_url(dataset)
filepath = joinpath(localdir, "$(replace(dataset, '/'=>'_')).zip")

Expand Down
25 changes: 24 additions & 1 deletion test/kaggle.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@testset "kaggle" begin
@testset "kaggle dataset" begin
dataset = "ningjingyu/fetchtest"

f = kdownload(dataset, pwd())
Expand All @@ -10,3 +10,26 @@

rm(joinpath(pwd(), "FetchTest"), recursive=true, force=true)
end

@testset "kaggle url" begin
urls = [
"https://www.kaggle.com/ningjingyu/fetchtest",
"https://www.kaggle.com/ningjingyu/fetchtest/tasks",
"https://www.kaggle.com/ningjingyu/fetchtest/code",
"https://www.kaggle.com/ningjingyu/fetchtest/discussion",
"https://www.kaggle.com/ningjingyu/fetchtest/activity",
"https://www.kaggle.com/ningjingyu/fetchtest/metadata",
"https://www.kaggle.com/ningjingyu/fetchtest/settings",
]

for url in urls
f = kdownload(url, pwd())
DataDeps.unpack(f)

open(joinpath(pwd(), "FetchTest", "FetchTest.txt"), "r") do file
@test readline(file) == "Test"
end

rm(joinpath(pwd(), "FetchTest"), recursive=true, force=true)
end
end

0 comments on commit 152aede

Please sign in to comment.