Skip to content

Commit

Permalink
Added Scrape service interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandr Filioglo authored Oct 20, 2020
1 parent 5044043 commit afaa757
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 3 deletions.
105 changes: 105 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Created by .ignore support plugin (hsz.mobi)
### VisualStudioCode template
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

# Local History for Visual Studio Code
.history/

### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
.idea


# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### Go template
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

tests/
14 changes: 11 additions & 3 deletions goscraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ type ScrapeBuilder interface {
SetMaxDocumentLength(int64) ScrapeBuilder
SetUrl(string) ScrapeBuilder
SetMaxRedirect(int) ScrapeBuilder
Build() (*Scraper, error)
Build() (ScrapeService, error)
}

type scrapeBuilder struct {
scrapeSettings scrapeSettings
}

func (b *scrapeBuilder) Build() (*Scraper, error) {
func (b *scrapeBuilder) Build() (ScrapeService, error) {
u, err := url.Parse(b.scrapeSettings.url)
if err != nil {
return nil, err
Expand Down Expand Up @@ -73,7 +73,9 @@ func (b *scrapeBuilder) SetUserAgent(s string) ScrapeBuilder {
}

func NewScrapeBuilder() ScrapeBuilder {
return &scrapeBuilder{scrapeSettings{userAgent: "GoScraper"}}
return &scrapeBuilder{
scrapeSettings: scrapeSettings{userAgent: "GoScraper"},
}
}

type ScraperOptions struct {
Expand Down Expand Up @@ -108,6 +110,12 @@ type DocumentPreview struct {
Link string
}

type ScrapeService interface {
Scrape() (*Document, error)
GetDocument() (*Document, error)
ParseDocument(doc *Document) (*Document, error)
}

func Scrape(uri string, maxRedirect int, options ScraperOptions) (*Document, error) {
u, err := url.Parse(uri)
if err != nil {
Expand Down

0 comments on commit afaa757

Please sign in to comment.