This repository has been archived by the owner on Jan 29, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhtfollow.cc
115 lines (98 loc) · 2.79 KB
/
htfollow.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
* htFollow - follow an http url until content is reached - implementation
* Copyright(c) 2005-2018 of wave++ (Yuri D'Elia)
* Distributed under GNU LGPL without ANY warranty.
*/
// interface and local headers
#include "htfollow.hh"
#include "hdrparse.hh"
#include "sanitize.hh"
#include "msg.hh"
using std::string;
using std::map;
// system headers
#include <memory>
using std::auto_ptr;
#include <stdexcept>
using std::runtime_error;
// c system headers
#include <stdio.h>
#include <stdarg.h>
#include <unistd.h>
// implementation
string
itos(const int i)
{
char buf[16];
snprintf(buf, sizeof(buf), "%d", i);
return string(buf);
}
Socket*
htFollow(map<string, string>& pReply, const URL& url, const Http::Header qHeaders,
size_t limit, time_t timeout, size_t retries, size_t waitSecs)
{
URL buf = url;
timeval tmBuf;
if(timeout)
{
tmBuf.tv_sec = timeout;
tmBuf.tv_usec = 0;
}
// connection loop
auto_ptr<Socket> s;
for(size_t level = limit, retry = retries;;)
{
// display the correct port name/number
if(!buf.port.size())
buf.port = Http::Proto::port;
msg("connecting to %s:%s", sanitize_esc(buf.server).c_str(),
sanitize_esc(buf.port).c_str());
Http::Http httpc(buf.server.c_str(), buf.port.c_str(), (timeout? &tmBuf: NULL));
msg("requesting data on %s", sanitize_esc(buf.path).c_str());
Http::Header aHeaders;
Http::Reply reply(&aHeaders);
try
{
Socket* tmp = httpc.get(buf.path.c_str(), reply, &qHeaders);
s.reset(tmp);
}
catch(runtime_error& err)
{
if(!retry--)
throw err;
else
{
msg("request failure: %s", err.what());
sleep(waitSecs);
continue;
}
}
// validate the reply code
if(reply.code != Http::Proto::ok &&
reply.code != Http::Proto::moved &&
reply.code != Http::Proto::found &&
reply.code != Http::Proto::other)
throw runtime_error(string("unexpected reply: ") +
itos(reply.code) + " " + sanitize_esc((reply.description.size()?
reply.description: reply.proto)).c_str());
// parse the headers
pReply = Http::hdrParse(aHeaders);
if(reply.code == Http::Proto::ok)
break;
// recursion
if(!level--)
throw runtime_error(string("hit redirect follow limit: ") + itos(limit));
map<string, string>::iterator urlPos = pReply.find(Http::Proto::location);
if(urlPos == pReply.end())
throw runtime_error("redirection didn't contain an url");
if(reply.code == Http::Proto::moved)
err("warning: content moved permanently to %s",
sanitize_esc(urlPos->second).c_str());
buf = urlPos->second;
if(buf.proto.size() && buf.proto != url.proto)
throw runtime_error(
string("protocol changes are not allowed in redirection (") +
url.proto + " -> " + sanitize_esc(buf.proto) + ")");
}
return s.release();
}