diff --git a/README.md b/README.md index 49343f1..4dc1894 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Production ready. * [new](#new) * [connect](#connect) +* [connect_proxy](#connect_proxy) +* [set_proxy_options](#set_proxy_options) * [set_timeout](#set_timeout) * [set_timeouts](#set_timeouts) * [ssl_handshake](#ssl_handshake) @@ -158,6 +160,24 @@ An optional Lua table can be specified as the last argument to this method to sp * `pool` : Specifies a custom name for the connection pool being used. If omitted, then the connection pool name will be generated from the string template `:` or ``. +## connect_proxy + +`syntax: ok, err = httpc:connect_proxy(proxy_uri, scheme, host, port)` + +Attempts to connect to the web server through the given proxy server. The method accepts the following arguments: + +* `proxy_uri` - Full URI of the proxy server to use (e.g. `http://proxy.example.com:3128/`). Note: Only `http` protocol is supported. +* `scheme` - The protocol to use between the proxy server and the remote host (`http` or `https`). If `https` is specified as the scheme, `connect_proxy()` makes a `CONNECT` request to establish a TCP tunnel to the remote host through the proxy server. +* `host` - The hostname of the remote host to connect to. +* `port` - The port of the remote host to connect to. + +If an error occurs during the connection attempt, this method returns `nil` with a string describing the error. If the connection was successfully established, the method returns `1`. + +There's a few key points to keep in mind when using this api: + +* If the scheme is `https`, you need to perform the TLS handshake with the remote server manually using the `ssl_handshake()` method before sending any requests through the proxy tunnel. +* If the scheme is `http`, you need to ensure that the requests you send through the connections conforms to [RFC 7230](https://tools.ietf.org/html/rfc7230) and especially [Section 5.3.2.](https://tools.ietf.org/html/rfc7230#section-5.3.2) which states that the request target must be in absolute form. In practice, this means that when you use `send_request()`, the `path` must be an absolute URI to the resource (e.g. `http://example.com/index.html` instead of just `/index.html`). + ## set_timeout `syntax: httpc:set_timeout(time)` @@ -192,6 +212,18 @@ Note that calling this instead of `close` is "safe" in that it will conditionall In case of success, returns `1`. In case of errors, returns `nil, err`. In the case where the conneciton is conditionally closed as described above, returns `2` and the error string `connection must be closed`. +## set_proxy_options + +`syntax: httpc:set_proxy_options(opts)` + +Configure an http proxy to be used with this client instance. The `opts` is a table that accepts the following fields: + +* `http_proxy` - an URI to a proxy server to be used with http requests +* `https_proxy` - an URI to a proxy server to be used with https requests +* `no_proxy` - a comma separated list of hosts that should not be proxied. + +Note that proxy options are only applied when using the high-level `request_uri()` API. + ## get_reused_times `syntax: times, err = httpc:get_reused_times()` @@ -232,7 +264,7 @@ When the request is successful, `res` will contain the following fields: * `status` The status code. * `reason` The status reason phrase. * `headers` A table of headers. Multiple headers with the same field name will be presented as a table of values. -* `has_body` A boolean flag indicating if there is a body to be read. +* `has_body` A boolean flag indicating if there is a body to be read. * `body_reader` An iterator function for reading the body in a streaming fashion. * `read_body` A method to read the entire body into a string. * `read_trailers` A method to merge any trailers underneath the headers, after reading the body. @@ -408,7 +440,7 @@ local res, err = httpc:request{ } ``` -If `sock` is specified, +If `sock` is specified, # Author diff --git a/lib/resty/http.lua b/lib/resty/http.lua index c59e6b6..b3cce43 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -15,6 +15,8 @@ local tbl_concat = table.concat local tbl_insert = table.insert local ngx_encode_args = ngx.encode_args local ngx_re_match = ngx.re.match +local ngx_re_gmatch = ngx.re.gmatch +local ngx_re_sub = ngx.re.sub local ngx_re_gsub = ngx.re.gsub local ngx_re_find = ngx.re.find local ngx_log = ngx.log @@ -98,7 +100,7 @@ end local _M = { - _VERSION = '0.11', + _VERSION = '0.12', } _M._USER_AGENT = "lua-resty-http/" .. _M._VERSION .. " (Lua) ngx_lua/" .. ngx.config.ngx_lua_version @@ -787,7 +789,6 @@ function _M.request_pipeline(self, requests) return responses end - function _M.request_uri(self, uri, params) params = tbl_copy(params or {}) -- Take by value @@ -800,11 +801,55 @@ function _M.request_uri(self, uri, params) if not params.path then params.path = path end if not params.query then params.query = query end - local c, err = self:connect(host, port) + -- See if we should use a proxy to make this request + local proxy_uri = self:get_proxy_uri(scheme, host) + + -- Make the connection either through the proxy or directly + -- to the remote host + local c, err + + if proxy_uri then + c, err = self:connect_proxy(proxy_uri, scheme, host, port) + else + c, err = self:connect(host, port) + end + if not c then return nil, err end + if proxy_uri then + if scheme == "http" then + -- When a proxy is used, the target URI must be in absolute-form + -- (RFC 7230, Section 5.3.2.). That is, it must be an absolute URI + -- to the remote resource with the scheme, host and an optional port + -- in place. + -- + -- Since _format_request() constructs the request line by concatenating + -- params.path and params.query together, we need to modify the path + -- to also include the scheme, host and port so that the final form + -- in conformant to RFC 7230. + if port == 80 then + params.path = scheme .. "://" .. host .. path + else + params.path = scheme .. "://" .. host .. ":" .. port .. path + end + end + + if scheme == "https" then + -- don't keep this connection alive as the next request could target + -- any host and re-using the proxy tunnel for that is not possible + self.keepalive = false + end + + -- self:connect_uri() set the host and port to point to the proxy server. As + -- the connection to the proxy has been established, set the host and port + -- to point to the actual remote endpoint at the other end of the tunnel to + -- ensure the correct Host header added to the requests. + self.host = host + self.port = port + end + if scheme == "https" then local verify = true if params.ssl_verify == false then @@ -914,5 +959,106 @@ function _M.proxy_response(self, response, chunksize) until not chunk end +function _M.set_proxy_options(self, opts) + self.proxy_opts = tbl_copy(opts) -- Take by value +end + +function _M.get_proxy_uri(self, scheme, host) + if not self.proxy_opts then + return nil + end + + -- Check if the no_proxy option matches this host. Implementation adapted + -- from lua-http library (https://github.com/daurnimator/lua-http) + if self.proxy_opts.no_proxy then + if self.proxy_opts.no_proxy == "*" then + -- all hosts are excluded + return nil + end + + local no_proxy_set = {} + -- wget allows domains in no_proxy list to be prefixed by "." + -- e.g. no_proxy=.mit.edu + for host_suffix in ngx_re_gmatch(self.proxy_opts.no_proxy, "\\.?([^,]+)") do + no_proxy_set[host_suffix[1]] = true + end + + -- From curl docs: + -- matched as either a domain which contains the hostname, or the + -- hostname itself. For example local.com would match local.com, + -- local.com:80, and www.local.com, but not www.notlocal.com. + -- + -- Therefore, we keep stripping subdomains from the host, compare + -- them to the ones in the no_proxy list and continue until we find + -- a match or until there's only the TLD left + repeat + if no_proxy_set[host] then + return nil + end + + -- Strip the next level from the domain and check if that one + -- is on the list + host = ngx_re_sub(host, "^[^.]+\\.", "") + until not ngx_re_find(host, "\\.") + end + + if scheme == "http" and self.proxy_opts.http_proxy then + return self.proxy_opts.http_proxy + end + + if scheme == "https" and self.proxy_opts.https_proxy then + return self.proxy_opts.https_proxy + end + + return nil +end + + +function _M.connect_proxy(self, proxy_uri, scheme, host, port) + -- Parse the provided proxy URI + local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) + if not parsed_proxy_uri then + return nil, err + end + + -- Check that the scheme is http (https is not supported for + -- connections between the client and the proxy) + local proxy_scheme = parsed_proxy_uri[1] + if proxy_scheme ~= "http" then + return nil, "protocol " .. proxy_scheme .. " not supported for proxy connections" + end + + -- Make the connection to the given proxy + local proxy_host, proxy_port = parsed_proxy_uri[2], parsed_proxy_uri[3] + local c, err = self:connect(proxy_host, proxy_port) + if not c then + return nil, err + end + + if scheme == "https" then + -- Make a CONNECT request to create a tunnel to the destination through + -- the proxy. The request-target and the Host header must be in the + -- authority-form of RFC 7230 Section 5.3.3. See also RFC 7231 Section + -- 4.3.6 for more details about the CONNECT request + local destination = host .. ":" .. port + local res, err = self:request({ + method = "CONNECT", + path = destination, + headers = { + ["Host"] = destination + } + }) + + if not res then + return nil, err + end + + if res.status < 200 or res.status > 299 then + return nil, "failed to establish a tunnel through a proxy: " .. res.status + end + end + + return c, nil +end return _M diff --git a/lib/resty/http_headers.lua b/lib/resty/http_headers.lua index 6ff26fb..56069ec 100644 --- a/lib/resty/http_headers.lua +++ b/lib/resty/http_headers.lua @@ -4,7 +4,7 @@ local rawget, rawset, setmetatable = local str_lower = string.lower local _M = { - _VERSION = '0.11', + _VERSION = '0.12', } diff --git a/lua-resty-http-0.11-0.rockspec b/lua-resty-http-0.12-0.rockspec similarity index 93% rename from lua-resty-http-0.11-0.rockspec rename to lua-resty-http-0.12-0.rockspec index e2088ac..d727114 100644 --- a/lua-resty-http-0.11-0.rockspec +++ b/lua-resty-http-0.12-0.rockspec @@ -1,8 +1,8 @@ package = "lua-resty-http" -version = "0.11-0" +version = "0.12-0" source = { url = "git://github.com/pintsized/lua-resty-http", - tag = "v0.11" + tag = "v0.12" } description = { summary = "Lua HTTP client cosocket driver for OpenResty / ngx_lua.", diff --git a/t/14-host-header.t b/t/14-host-header.t index 62a6164..110a669 100644 --- a/t/14-host-header.t +++ b/t/14-host-header.t @@ -12,7 +12,7 @@ $ENV{TEST_COVERAGE} ||= 0; our $HttpConfig = qq{ lua_package_path "$pwd/lib/?.lua;/usr/local/share/lua/5.1/?.lua;;"; error_log logs/error.log debug; - resolver 8.8.8.8; + resolver 8.8.8.8 ipv6=off; init_by_lua_block { if $ENV{TEST_COVERAGE} == 1 then @@ -165,3 +165,30 @@ GET /a [error] --- response_body Unable to generate a useful Host header for a unix domain socket. Please provide one. + +=== TEST 6: Host header is correct when http_proxy is used +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + } + +--- config + location /lua { + content_by_lua ' + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080" + }) + local res, err = httpc:request_uri("http://127.0.0.1:8081") + '; + } +--- request +GET /lua +--- no_error_log +[error] +--- error_log +Host: 127.0.0.1:8081 diff --git a/t/16-http-proxy.t b/t/16-http-proxy.t new file mode 100644 index 0000000..a0bdf39 --- /dev/null +++ b/t/16-http-proxy.t @@ -0,0 +1,297 @@ +use Test::Nginx::Socket; +use Cwd qw(cwd); + +plan tests => repeat_each() * (blocks() * 4); + +my $pwd = cwd(); + +$ENV{TEST_NGINX_RESOLVER} = '8.8.8.8'; +$ENV{TEST_NGINX_PWD} ||= $pwd; +$ENV{TEST_COVERAGE} ||= 0; + +our $HttpConfig = qq{ + lua_package_path "$pwd/lib/?.lua;/usr/local/share/lua/5.1/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + + init_by_lua_block { + if $ENV{TEST_COVERAGE} == 1 then + jit.off() + require("luacov.runner").init() + end + } +}; + +no_long_string(); +run_tests(); + +__DATA__ +=== TEST 1: get_proxy_uri returns nil if proxy is not configured +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + ngx.say(httpc:get_proxy_uri("http", "example.com")) + } + } +--- request +GET /lua +--- response_body +nil +--- no_error_log +[error] +[warn] + +=== TEST 2: get_proxy_uri matches no_proxy hosts correctly +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + + -- helper that verifies get_proxy_uri works correctly with the given + -- scheme, host and no_proxy list + function test_no_proxy(scheme, host, no_proxy) + httpc:set_proxy_options({ + http_proxy = "http://http_proxy.example.com", + https_proxy = "http://https_proxy.example.com", + no_proxy = no_proxy + }) + + local proxy_uri = httpc:get_proxy_uri(scheme, host) + ngx.say("scheme: ", scheme, ", host: ", host, ", no_proxy: ", no_proxy, ", proxy_uri: ", proxy_uri) + end + + -- All these match the no_proxy list + test_no_proxy("http", "example.com", nil) + test_no_proxy("http", "example.com", "*") + test_no_proxy("http", "example.com", "example.com") + test_no_proxy("http", "sub.example.com", "example.com") + test_no_proxy("http", "example.com", "example.com,example.org") + test_no_proxy("http", "example.com", "example.org,example.com") + + -- Same for https for good measure + test_no_proxy("https", "example.com", nil) + test_no_proxy("https", "example.com", "*") + test_no_proxy("https", "example.com", "example.com") + test_no_proxy("https", "sub.example.com", "example.com") + test_no_proxy("https", "example.com", "example.com,example.org") + test_no_proxy("https", "example.com", "example.org,example.com") + + -- Edge cases + + -- example.com should match .example.com in the no_proxy list (legacy behavior of wget) + test_no_proxy("http", "example.com", ".example.com") + + -- notexample.com should not match example.com in the no_proxy list (not a subdomain) + test_no_proxy("http", "notexample.com", "example.com") + } + } +--- request +GET /lua +--- response_body +scheme: http, host: example.com, no_proxy: nil, proxy_uri: http://http_proxy.example.com +scheme: http, host: example.com, no_proxy: *, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.com, proxy_uri: nil +scheme: http, host: sub.example.com, no_proxy: example.com, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.com,example.org, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.org,example.com, proxy_uri: nil +scheme: https, host: example.com, no_proxy: nil, proxy_uri: http://https_proxy.example.com +scheme: https, host: example.com, no_proxy: *, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.com, proxy_uri: nil +scheme: https, host: sub.example.com, no_proxy: example.com, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.com,example.org, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.org,example.com, proxy_uri: nil +scheme: http, host: example.com, no_proxy: .example.com, proxy_uri: nil +scheme: http, host: notexample.com, no_proxy: example.com, proxy_uri: http://http_proxy.example.com +--- no_error_log +[error] +[warn] + +=== TEST 3: get_proxy_uri returns correct proxy URIs for http and https URIs +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + + -- helper that configures the proxy opts as proived and checks what + -- get_proxy_uri says for the given scheme / host pair + function test_get_proxy_uri(scheme, host, http_proxy, https_proxy) + httpc:set_proxy_options({ + http_proxy = http_proxy, + https_proxy = https_proxy + }) + + local proxy_uri = httpc:get_proxy_uri(scheme, host) + ngx.say( + "scheme: ", scheme, + ", host: ", host, + ", http_proxy: ", http_proxy, + ", https_proxy: ", https_proxy, + ", proxy_uri: ", proxy_uri + ) + end + + -- http + test_get_proxy_uri("http", "example.com", "http_proxy", "https_proxy") + test_get_proxy_uri("http", "example.com", nil, "https_proxy") + + -- https + test_get_proxy_uri("https", "example.com", "http_proxy", "https_proxy") + test_get_proxy_uri("https", "example.com", "http_proxy", nil) + } + } +--- request +GET /lua +--- response_body +scheme: http, host: example.com, http_proxy: http_proxy, https_proxy: https_proxy, proxy_uri: http_proxy +scheme: http, host: example.com, http_proxy: nil, https_proxy: https_proxy, proxy_uri: nil +scheme: https, host: example.com, http_proxy: http_proxy, https_proxy: https_proxy, proxy_uri: https_proxy +scheme: https, host: example.com, http_proxy: http_proxy, https_proxy: nil, proxy_uri: nil +--- no_error_log +[error] +[warn] + +=== TEST 4: request_uri uses http_proxy correctly for non-standard destination ports +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + + location / { + content_by_lua_block { + ngx.print(ngx.req.raw_header()) + } + } + } +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080", + https_proxy = "http://127.0.0.1:8080" + }) + + -- request should go to the proxy server + local res, err = httpc:request_uri("http://127.0.0.1:1234/target?a=1&b=2") + + if not res then + ngx.log(ngx.ERR, err) + return + end + ngx.status = res.status + ngx.say(res.body) + } + } +--- request +GET /lua +--- response_body_like +^GET http://127.0.0.1:1234/target\?a=1&b=2 HTTP/.+\r\nHost: 127.0.0.1:1234.+ +--- no_error_log +[error] +[warn] + +=== TEST 5: request_uri uses http_proxy correctly for standard destination port +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + + location / { + content_by_lua_block { + ngx.print(ngx.req.raw_header()) + } + } + } +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080", + https_proxy = "http://127.0.0.1:8080" + }) + + -- request should go to the proxy server + local res, err = httpc:request_uri("http://127.0.0.1/target?a=1&b=2") + + if not res then + ngx.log(ngx.ERR, err) + return + end + + -- the proxy echoed the raw request header and we shall pass it onwards + -- to the test harness + ngx.status = res.status + ngx.say(res.body) + } + } +--- request +GET /lua +--- response_body_like +^GET http://127.0.0.1/target\?a=1&b=2 HTTP/.+\r\nHost: 127.0.0.1.+ +--- no_error_log +[error] +[warn] + +=== TEST 6: request_uri makes a proper CONNECT request when proxying https resources +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:12345", + https_proxy = "http://127.0.0.1:12345" + }) + + -- Slight Hack: temporarily change the module global user agent to make it + -- predictable for this test case + local ua = http._USER_AGENT + http._USER_AGENT = "test_ua" + local res, err = httpc:request_uri("https://127.0.0.1/target?a=1&b=2") + http._USER_AGENT = ua + + if not err then + -- The proxy request should fail as the TCP server listening returns + -- 403 response. We cannot really test the success case here as that + -- would require an actual reverse proxy to be implemented through + -- the limited functionality we have available in the raw TCP sockets + ngx.log(ngx.ERR, "unexpected success") + return + end + + ngx.status = 403 + ngx.say(err) + } + } +--- tcp_listen: 12345 +--- tcp_query eval +qr/CONNECT 127.0.0.1:443 HTTP\/1.1\r\n.*Host: 127.0.0.1:443\r\n.*/s + +# The reply cannot be successful or otherwise the client would start +# to do a TLS handshake with the proxied host and that we cannot +# do with these sockets +--- tcp_reply +HTTP/1.1 403 Forbidden +Connection: close + +--- request +GET /lua +--- error_code: 403 +--- no_error_log +[error] +[warn]