-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
5,680 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package amazon | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"fmt" | ||
"io" | ||
"net/http" | ||
"net/url" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
"time" | ||
) | ||
|
||
const ( | ||
// https://www.blu-ray.com/search/?quicksearch=1&quicksearch_country=UK&quicksearch_keyword=chaos+theory§ion=bluraymovies | ||
amazonURL = "https://www.blu-ray.com/search/?quicksearch=1&quicksearch_country=UK&quicksearch_keyword=" | ||
) | ||
|
||
type searchResult struct { | ||
title string | ||
url string | ||
formats []string | ||
year string | ||
} | ||
|
||
func SearchAmazon(title, year string) (hit bool, returnURL string, formats []string) { | ||
urlEncodedTitle := url.QueryEscape(title) | ||
amazonURL := fmt.Sprintf("%s%s%s", amazonURL, urlEncodedTitle, "§ion=bluraymovies") | ||
req, err := http.NewRequestWithContext(context.Background(), "GET", amazonURL, bytes.NewBuffer([]byte{})) | ||
|
||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") | ||
if err != nil { | ||
fmt.Println("Error creating request:", err) | ||
return false, "", nil | ||
} | ||
|
||
client := &http.Client{} | ||
resp, err := client.Do(req) | ||
if err != nil { | ||
fmt.Println("Error sending request:", err) | ||
return false, "", nil | ||
} | ||
|
||
defer resp.Body.Close() | ||
|
||
body, err := io.ReadAll(resp.Body) | ||
if err != nil { | ||
fmt.Println("Error reading response body:", err) | ||
return false, "", nil | ||
} | ||
rawData := string(body) | ||
moviesFound := findMoviesInResponse(rawData) | ||
if len(moviesFound) > 0 { | ||
return matchTitle(title, year, moviesFound) | ||
} | ||
return false, "", nil | ||
} | ||
|
||
func findMoviesInResponse(response string) (results []searchResult) { | ||
// look for the movies in the response | ||
// will be surrounded by <li class="clearfix"> and </li> | ||
// the url will be in the href attribute of the <a> tag | ||
// the title will be in the <a> tag | ||
|
||
// Find the start and end index of the movie entry | ||
for { | ||
startIndex := strings.Index(response, `<a class="hoverlink" data-globalproductid=`) | ||
// remove everything before the start index | ||
if startIndex == -1 { | ||
break | ||
} | ||
response = response[startIndex:] | ||
endIndex := strings.Index(response, `</div></div>`) | ||
|
||
// If both start and end index are found | ||
if endIndex != -1 { | ||
// Extract the movie entry | ||
movieEntry := response[0:endIndex] | ||
|
||
//fmt.Println(movieEntry) | ||
// Find the URL of the movie | ||
urlStartIndex := strings.Index(movieEntry, "href=\"") + len("href=\"") | ||
urlEndIndex := strings.Index(movieEntry[urlStartIndex:], "\"") + urlStartIndex | ||
returnURL := movieEntry[urlStartIndex:urlEndIndex] | ||
// Find the title of the movie | ||
r := regexp.MustCompile(`title="(.*?)\s*\((.*?)\)"`) | ||
// Find the first match | ||
match := r.FindStringSubmatch(movieEntry) | ||
|
||
if match != nil { | ||
// Extract and print title and year | ||
foundTitle := match[1] | ||
year := match[2] | ||
// Find the formats of the movie | ||
// if the title ends with 4k, then it is 4k | ||
var format string | ||
if strings.HasSuffix(foundTitle, "4K") { | ||
format = "4K Blu-ray" | ||
} else { | ||
format = "Blu-ray" | ||
} | ||
|
||
results = append(results, searchResult{title: foundTitle, year: year, url: returnURL, formats: []string{format}}) | ||
} | ||
// remove the movie entry from the response | ||
response = response[endIndex:] | ||
} else { | ||
break | ||
} | ||
} | ||
|
||
return results | ||
} | ||
|
||
func matchTitle(title, year string, results []searchResult) (hit bool, returnURL string, formats []string) { | ||
expectedYear := yearToDate(year) | ||
for _, result := range results { | ||
// normally a match if the year is within 1 year of each other | ||
resultYear := yearToDate(result.year) | ||
if result.title == title && (resultYear.Year() == expectedYear.Year() || | ||
resultYear.Year() == expectedYear.Year()-1 || resultYear.Year() == expectedYear.Year()+1) { | ||
return true, result.url, result.formats | ||
} | ||
} | ||
return false, "", nil | ||
} | ||
|
||
func yearToDate(yearString string) time.Time { | ||
year, err := strconv.Atoi(yearString) | ||
if err != nil { | ||
return time.Time{} | ||
} | ||
return time.Date(year, 1, 1, 0, 0, 0, 0, time.UTC) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package amazon | ||
|
||
import ( | ||
"os" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestExtractMovieFormats(t *testing.T) { | ||
movieEntry := `<ul class="media-types"><li><span class="cpi-dvd cp-tab" title="DVD" data-json={"action":"media-format","filmId":0,"mediaTypeId":1}></span></li><li><span class="cpi-blu-ray cp-tab" title=" Blu-ray" data-json={"action":"media-format","filmId":0,"mediaTypeId":3}></span></li><li><span class="cpi-4-k cp-tab" title=" 4K Blu-ray" data-json={"action":"media-format","filmId":0,"mediaTypeId":14}></span></li></ul>` //nolint: lll | ||
|
||
expectedFormats := []string{"DVD", "Blu-ray", "4K Blu-ray"} | ||
formats := extractMovieFormats(movieEntry) | ||
|
||
if len(formats) != len(expectedFormats) { | ||
t.Errorf("Expected %d formats, but got %d", len(expectedFormats), len(formats)) | ||
} | ||
|
||
for i, format := range formats { | ||
if format != expectedFormats[i] { | ||
t.Errorf("Expected format %s, but got %s", expectedFormats[i], format) | ||
} | ||
} | ||
} | ||
|
||
func TestFindMoviesInResponse(t *testing.T) { | ||
// read response from testdata/cats.html | ||
rawdata, err := os.ReadFile("testdata/cats.html") | ||
if err != nil { | ||
t.Errorf("Error reading testdata/cats.html: %s", err) | ||
} | ||
|
||
searchResult := findMoviesInResponse(string(rawdata)) | ||
|
||
if len(searchResult) != 15 { | ||
t.Errorf("Expected 15 search result, but got %d", len(searchResult)) | ||
} | ||
|
||
if searchResult[0].title != "Cats" { | ||
t.Errorf("Expected title Cats, but got %s", searchResult[0].title) | ||
} | ||
if searchResult[0].year != "1998" { | ||
t.Errorf("Expected year 1998, but got %s", searchResult[0].year) | ||
} | ||
// check formats | ||
if searchResult[0].formats[0] != "DVD" { | ||
t.Errorf("Expected format DVD, but got %s", searchResult[0].formats[0]) | ||
} | ||
if searchResult[0].formats[1] != "Blu-ray" { | ||
t.Errorf("Expected format Blu-ray, but got %s", searchResult[0].formats[0]) | ||
} | ||
} | ||
|
||
func TestYearToDate(t *testing.T) { | ||
// Test case 1: Valid year string | ||
yearString := "2022" | ||
expectedDate := time.Date(2022, 1, 1, 0, 0, 0, 0, time.UTC) | ||
result := yearToDate(yearString) | ||
if result != expectedDate { | ||
t.Errorf("Expected date %v, but got %v", expectedDate, result) | ||
} | ||
|
||
// Test case 2: Invalid year string | ||
yearString = "abcd" | ||
expectedDate = time.Time{} | ||
result = yearToDate(yearString) | ||
if result != expectedDate { | ||
t.Errorf("Expected date %v, but got %v", expectedDate, result) | ||
} | ||
} | ||
func TestMatchTitle(t *testing.T) { //nolint: gocyclo | ||
results := []searchResult{ | ||
{title: "Cats", year: "1998", url: "https://example.com/cats", formats: []string{"DVD", "Blu-ray"}}, | ||
{title: "Dogs", year: "2000", url: "https://example.com/dogs", formats: []string{"DVD"}}, | ||
{title: "Birds", year: "2002", url: "https://example.com/birds", formats: []string{"Blu-ray"}}, | ||
} | ||
|
||
// Test case 1: Matching title and year | ||
hit, returnURL, formats := matchTitle("Cats", "1998", results) | ||
if !hit { | ||
t.Errorf("Expected hit to be true, but got false") | ||
} | ||
if returnURL != "https://example.com/cats" { | ||
t.Errorf("Expected returnURL to be 'https://example.com/cats', but got '%s'", returnURL) | ||
} | ||
expectedFormats := []string{"DVD", "Blu-ray"} | ||
if len(formats) != len(expectedFormats) { | ||
t.Errorf("Expected %d formats, but got %d", len(expectedFormats), len(formats)) | ||
} | ||
for i, format := range formats { | ||
if format != expectedFormats[i] { | ||
t.Errorf("Expected format %s, but got %s", expectedFormats[i], format) | ||
} | ||
} | ||
|
||
// Test case 2: Non-matching title | ||
hit, returnURL, formats = matchTitle("Dogs", "1998", results) | ||
if hit { | ||
t.Errorf("Expected hit to be false, but got true") | ||
} | ||
if returnURL != "" { | ||
t.Errorf("Expected returnURL to be empty, but got '%s'", returnURL) | ||
} | ||
if formats != nil { | ||
t.Errorf("Expected formats to be nil, but got %v", formats) | ||
} | ||
|
||
// Test case 3: Non-matching year | ||
hit, returnURL, formats = matchTitle("Cats", "2000", results) | ||
if hit { | ||
t.Errorf("Expected hit to be false, but got true") | ||
} | ||
if returnURL != "" { | ||
t.Errorf("Expected returnURL to be empty, but got '%s'", returnURL) | ||
} | ||
if formats != nil { | ||
t.Errorf("Expected formats to be nil, but got %v", formats) | ||
} | ||
|
||
// Test case 4: Matching title and year within 1 year difference | ||
hit, returnURL, formats = matchTitle("Cats", "1999", results) | ||
if !hit { | ||
t.Errorf("Expected hit to be true, but got false") | ||
} | ||
if returnURL != "https://example.com/cats" { | ||
t.Errorf("Expected returnURL to be 'https://example.com/cats', but got '%s'", returnURL) | ||
} | ||
if len(formats) != len(expectedFormats) { | ||
t.Errorf("Expected %d formats, but got %d", len(expectedFormats), len(formats)) | ||
} | ||
for i, format := range formats { | ||
if format != expectedFormats[i] { | ||
t.Errorf("Expected format %s, but got %s", expectedFormats[i], format) | ||
} | ||
} | ||
} |
Oops, something went wrong.