Skip to content

Commit

Permalink
feat: a simple middleman server
Browse files Browse the repository at this point in the history
  • Loading branch information
leosocy committed Jun 7, 2019
1 parent 6b5fa69 commit ea2a99c
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 34 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
[![Build Status](https://travis-ci.org/Leosocy/IntelliProxy.svg?branch=master)](https://travis-ci.org/Leosocy/IntelliProxy)
[![codecov](https://codecov.io/gh/Leosocy/IntelliProxy/branch/master/graph/badge.svg)](https://codecov.io/gh/Leosocy/IntelliProxy)

<p align="center">
client <--> middleman server <--> real proxy server <--> target host
<a href="https://github.com/Leosocy/IntelliProxy">
<img src="https://blog-images-1257621236.cos.ap-shanghai.myqcloud.com/IntelliProxy-MiddlemanServer.gif">
</a>
</p>

> - middleman: client <--request--> middleman server <--> real proxy server <--> internet
> - datasource: client <--RESTful api--> data source server
通过go的高并发,周期性爬取大量免费的代理资源,进行质量筛选,并存储到Storage中,提供**稳定****实时****高可用**的HTTP/HTTPS代理。
Expand Down
28 changes: 24 additions & 4 deletions architecture.puml
Original file line number Diff line number Diff line change
@@ -1,7 +1,27 @@
@startuml
Alice -> Bob: Authentication Request
Bob --> Alice: Authentication Response

Alice -> Bob: Another authentication Request
Alice <-- Bob: another authentication Response
client -> middleman: HTTP Connect Method
middleman -> real_proxy: HTTP Connect Method
real_proxy -> target_host: TCP Connect

target_host --> real_proxy: TCP Establish
real_proxy --> middleman: HTTP Establish
middleman --> client: HTTP Establish

client -> middleman: TLS Handshake
middleman -> real_proxy: Copy Raw Data
real_proxy -> target_host: Copy Raw Data

target_host --> real_proxy: TLS Handshake Done
real_proxy --> middleman: Copy Raw Data
middleman --> client: Copy Raw Data

client -> middleman: Application Data
middleman -> real_proxy: Copy Raw Data
real_proxy -> target_host: Copy Raw Data

target_host --> real_proxy: Application Data
real_proxy --> middleman: Copy Raw Data
middleman --> client: Copy Raw Data

@enduml
9 changes: 6 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package main

import (
"github.com/Leosocy/IntelliProxy/pkg/sched"
"github.com/Leosocy/IntelliProxy/service/middleman"
"net/http"
)

func main() {
//scheduler := sched.NewScheduler()
//scheduler.Start()
middleman.ListenAndServe()
scheduler := sched.NewScheduler()
middlemanServer := middleman.NewServer(scheduler.GetStorage())
http.ListenAndServe("0.0.0.0:8081", middlemanServer)
scheduler.Start()
}
6 changes: 5 additions & 1 deletion pkg/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ func NewScheduler() *Scheduler {
return sc
}

func (sc *Scheduler) GetStorage() storage.Storage {
return sc.storage
}

// Start open the background crawling, detection, inspection tasks,
// and receive the agent and process.
func (sc *Scheduler) Start() {
Expand Down Expand Up @@ -152,6 +156,6 @@ func (sc *Scheduler) bgCrawling(threshold uint) {
for _, s := range sc.spiders {
s.TryCrawl()
}
time.Sleep(5 * time.Minute)
time.Sleep(20 * time.Minute)
}
}
5 changes: 5 additions & 0 deletions pkg/selector/selector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) 2019 leosocy, [email protected]
// Use of this source code is governed by a MIT-style license
// that can be found in the LICENSE file.

package selector
5 changes: 5 additions & 0 deletions pkg/selector/strategy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) 2019 leosocy, [email protected]
// Use of this source code is governed by a MIT-style license
// that can be found in the LICENSE file.

package selector
101 changes: 76 additions & 25 deletions service/middleman/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,94 @@
package middleman

import (
"github.com/Sirupsen/logrus"
"bufio"
"errors"
"github.com/Leosocy/IntelliProxy/pkg/storage"
"github.com/elazarl/goproxy"
"io/ioutil"
"math/rand"
"net"
"net/http"
"net/url"
"strings"
"time"
)

type ProxyConn struct {
type connectDialFunc func(network, address string) (net.Conn, error)

type proxyURLGetter func() (*url.URL, error)

// Server is a middleman between client and proxy server.
type Server struct {
storage storage.Storage
*goproxy.ProxyHttpServer
}

type PooledProxyConns struct {
func httpConnectDialToProxyHandler(pg proxyURLGetter) func(r *http.Request) (*url.URL, error) {
return func(r *http.Request) (*url.URL, error) {
return pg()
}
}

func httpsConnectDialToProxyHandler(pg proxyURLGetter, dial connectDialFunc) connectDialFunc {
return func(network, addr string) (net.Conn, error) {
proxyURL, err := pg()
if err != nil {
return nil, err
}
if strings.IndexRune(proxyURL.Host, ':') == -1 {
proxyURL.Host += ":80"
}
connectReq := &http.Request{
Method: "CONNECT",
URL: &url.URL{Opaque: addr},
Host: addr,
Header: make(http.Header),
}
c, err := dial(network, proxyURL.Host)
if err != nil {
return nil, err
}
connectReq.Write(c)
// Read response.
// Okay to use and discard buffered reader here, because
// TLS server will not speak until spoken to.
br := bufio.NewReader(c)
resp, err := http.ReadResponse(br, connectReq)
if err != nil {
c.Close()
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
resp, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
c.Close()
return nil, errors.New("proxy refused connection" + string(resp))
}
return c, nil
}
}

// ProxyServer is a middleman between client and proxy server
// client <--> middleman <--> proxy server <--> destination
type ProxyServer struct {
// 对goproxy.ProxyHttpServer封装,
func NewServer(storage storage.Storage) *Server {
ps := goproxy.NewProxyHttpServer()
middleman := &Server{
storage: storage,
ProxyHttpServer: ps,
}
rand.Seed(time.Now().UnixNano())
middleman.Tr.Proxy = httpConnectDialToProxyHandler(middleman.getProxyURL)
middleman.ConnectDial = httpsConnectDialToProxyHandler(middleman.getProxyURL, net.Dial)
return middleman
}

func ListenAndServe() {
middleman := goproxy.NewProxyHttpServer()
middleman.Verbose = true
proxy := "http://122.193.246.140:9999"
// for request to http://xxx,middleman<->proxy server连接默认keepalive,
// 好像默认75s,改天查一下。
middleman.Tr.Proxy = func(request *http.Request) (*url.URL, error) {
return url.Parse(proxy)
func (s *Server) getProxyURL() (*url.URL, error) {
proxies := s.storage.TopK(20)
if len(proxies) == 0 {
return nil, errors.New("no proxy available")
}
// for request to https://xxx,middleman<->proxy server连接每次关闭
middleman.ConnectDial = middleman.NewConnectDialToProxy(proxy)
//middleman.Tr.Dial = func(network, addr string) (c net.Conn, err error) {
// c, err = net.Dial(network, addr)
// if c, ok := c.(*net.TCPConn); err == nil && ok {
// err = c.SetKeepAlive(true)
// }
// return
//}
logrus.Fatal(http.ListenAndServe(":8081", middleman))
index := rand.Int() % len(proxies)
return url.Parse(proxies[index].URL())
}

0 comments on commit ea2a99c

Please sign in to comment.