diff --git a/lib/protocol/http1/connection.rb b/lib/protocol/http1/connection.rb index feb451a..c2e5b26 100644 --- a/lib/protocol/http1/connection.rb +++ b/lib/protocol/http1/connection.rb @@ -37,8 +37,11 @@ module HTTP1 # HTTP/1.x header parser: FIELD_NAME = TOKEN - FIELD_VALUE = /[^\000-\037]*/.freeze - HEADER = /\A(#{FIELD_NAME}):\s*(#{FIELD_VALUE})\s*\z/.freeze + WS = /[ \t]/ # Whitespace. + OWS = /#{WS}*/ # Optional whitespace. + VCHAR = /[!-~]/ # Match visible characters from ASCII 33 to 126. + FIELD_VALUE = /#{VCHAR}+(?:#{WS}+#{VCHAR}+)*/.freeze + HEADER = /\A(#{FIELD_NAME}):#{OWS}(?:(#{FIELD_VALUE})#{OWS})?\z/.freeze VALID_FIELD_NAME = /\A#{FIELD_NAME}\z/.freeze VALID_FIELD_VALUE = /\A#{FIELD_VALUE}\z/.freeze @@ -484,7 +487,7 @@ def read_headers break if line.empty? if match = line.match(HEADER) - fields << [match[1], match[2]] + fields << [match[1], match[2] || ""] else raise BadHeader, "Could not parse header: #{line.inspect}" end diff --git a/test/protocol/http1/connection/headers.rb b/test/protocol/http1/connection/headers.rb new file mode 100644 index 0000000..1ac8b72 --- /dev/null +++ b/test/protocol/http1/connection/headers.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +require "protocol/http1/connection" +require "connection_context" + +describe Protocol::HTTP1::Connection do + include_context ConnectionContext + + let(:headers) {Array.new} + + before do + client.stream.write "GET / HTTP/1.1\r\nHost: localhost\r\n#{headers.join("\r\n")}\r\n\r\n" + client.stream.close + end + + with "header that contains tab characters" do + let(:headers) {[ + "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) \t\t\tChrome/55.0.2883.95 Safari/537.36" + ]} + + it "can parse the header" do + authority, method, target, version, headers, body = server.read_request + + expect(headers).to have_keys( + "user-agent" => be == "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) \t\t\tChrome/55.0.2883.95 Safari/537.36" + ) + end + end + + with "header that contains obsolete folding whitespace" do + let(:headers) {[ + "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko)\n\tChrome/55.0.2883.95 Safari/537.36" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that contains invalid characters" do + let(:headers) {[ + "user-agent: Mozilla\x00Hacker Browser" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that contains invalid high characters" do + let(:headers) {[ + "user-agent: Mozilla\x7FHacker Browser" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that has empty value" do + let(:headers) {[ + "user-agent: " + ]} + + it "can parse the header" do + authority, method, target, version, headers, body = server.read_request + + expect(headers).to have_keys( + "user-agent" => be == "" + ) + end + end + + with "header that has invalid name" do + let(:headers) {[ + "invalid name: value" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end + + with "header that has empty name" do + let(:headers) {[ + ": value" + ]} + + it "rejects the request" do + expect do + server.read_request + end.to raise_exception(Protocol::HTTP1::BadHeader) + end + end +end diff --git a/test/protocol/http1/parser.rb b/test/protocol/http1/parser.rb new file mode 100644 index 0000000..5bcecde --- /dev/null +++ b/test/protocol/http1/parser.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +require "protocol/http1/connection" + +describe Protocol::HTTP1 do + describe "REQUEST_LINE" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::REQUEST_LINE) + end + end + + describe "HEADER" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::HEADER) + end + end + + describe "VALID_FIELD_NAME" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::VALID_FIELD_NAME) + end + end + + describe "VALID_FIELD_VALUE" do + it "parses in linear time" do + skip_unless_method_defined(:linear_time?, Regexp.singleton_class) + + expect(Regexp).to be(:linear_time?, Protocol::HTTP1::VALID_FIELD_VALUE) + end + end +end