# frozen_string_literal: true require 'strscan' require_relative '../utils' require_relative '../bad_request' module Rack module Multipart class MultipartPartLimitError < Errno::EMFILE include BadRequest end class MultipartTotalPartLimitError < StandardError include BadRequest end # Use specific error class when parsing multipart request # that ends early. class EmptyContentError < ::EOFError include BadRequest end # Base class for multipart exceptions that do not subclass from # other exception classes for backwards compatibility. class BoundaryTooLongError < StandardError include BadRequest end # Prefer to use the BoundaryTooLongError class or Rack::BadRequest. Error = BoundaryTooLongError EOL = "\r\n" MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:(.*)(?=#{EOL}(\S|\z))/ni MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni class Parser BUFSIZE = 1_048_576 TEXT_PLAIN = "text/plain" TEMPFILE_FACTORY = lambda { |filename, content_type| extension = ::File.extname(filename.gsub("\0", '%00'))[0, 129] Tempfile.new(["RackMultipart", extension]) } class BoundedIO # :nodoc: def initialize(io, content_length) @io = io @content_length = content_length @cursor = 0 end def read(size, outbuf = nil) return if @cursor >= @content_length left = @content_length - @cursor str = if left < size @io.read left, outbuf else @io.read size, outbuf end if str @cursor += str.bytesize else # Raise an error for mismatching content-length and actual contents raise EOFError, "bad content body" end str end end MultipartInfo = Struct.new :params, :tmp_files EMPTY = MultipartInfo.new(nil, []) def self.parse_boundary(content_type) return unless content_type data = content_type.match(MULTIPART) return unless data data[1] end def self.parse(io, content_length, content_type, tmpfile, bufsize, qp) return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary if boundary.length > 70 # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. # Most clients use no more than 55 characters. raise BoundaryTooLongError, "multipart boundary size too large (#{boundary.length} characters)" end io = BoundedIO.new(io, content_length) if content_length parser = new(boundary, tmpfile, bufsize, qp) parser.parse(io) parser.result end class Collector class MimePart < Struct.new(:body, :head, :filename, :content_type, :name) def get_data data = body if filename == "" # filename is blank which means no file has been selected return elsif filename body.rewind if body.respond_to?(:rewind) # Take the basename of the upload's original filename. # This handles the full Windows paths given by Internet Explorer # (and perhaps other broken user agents) without affecting # those which give the lone filename. fn = filename.split(/[\/\\]/).last data = { filename: fn, type: content_type, name: name, tempfile: body, head: head } end yield data end end class BufferPart < MimePart def file?; false; end def close; end end class TempfilePart < MimePart def file?; true; end def close; body.close; end end include Enumerable def initialize(tempfile) @tempfile = tempfile @mime_parts = [] @open_files = 0 end def each @mime_parts.each { |part| yield part } end def on_mime_head(mime_index, head, filename, content_type, name) if filename body = @tempfile.call(filename, content_type) body.binmode if body.respond_to?(:binmode) klass = TempfilePart @open_files += 1 else body = String.new klass = BufferPart end @mime_parts[mime_index] = klass.new(body, head, filename, content_type, name) check_part_limits end def on_mime_body(mime_index, content) @mime_parts[mime_index].body << content end def on_mime_finish(mime_index) end private def check_part_limits file_limit = Utils.multipart_file_limit part_limit = Utils.multipart_total_part_limit if file_limit && file_limit > 0 if @open_files >= file_limit @mime_parts.each(&:close) raise MultipartPartLimitError, 'Maximum file multiparts in content reached' end end if part_limit && part_limit > 0 if @mime_parts.size >= part_limit @mime_parts.each(&:close) raise MultipartTotalPartLimitError, 'Maximum total multiparts in content reached' end end end end attr_reader :state def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params @bufsize = bufsize @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m @body_regex_at_end = /#{@body_regex}\z/m @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end def parse(io) outbuf = String.new read_data(io, outbuf) loop do status = case @state when :FAST_FORWARD handle_fast_forward when :CONSUME_TOKEN handle_consume_token when :MIME_HEAD handle_mime_head when :MIME_BODY handle_mime_body else # when :DONE return end read_data(io, outbuf) if status == :want_read end end def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) @query_parser.normalize_params(@params, part.name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end private def dequote(str) # From WEBrick::HTTPUtils ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup ret.gsub!(/\\(.)/, "\\1") ret end def read_data(io, outbuf) content = io.read(@bufsize, outbuf) handle_empty_content!(content) @sbuf.concat(content) end # This handles the initial parser state. We read until we find the starting # boundary, then we can transition to the next state. If we find the ending # boundary, this is an invalid multipart upload, but keep scanning for opening # boundary in that case. If no boundary found, we need to keep reading data # and retry. It's highly unlikely the initial read will not consume the # boundary. The client would have to deliberately craft a response # with the opening boundary beyond the buffer size for that to happen. def handle_fast_forward while true case consume_boundary when :BOUNDARY # found opening boundary, transition to next state @state = :MIME_HEAD return when :END_BOUNDARY # invalid multipart upload, but retry for opening boundary else # no boundary found, keep reading data return :want_read end end end def handle_consume_token tok = consume_boundary # break if we're at the end of a buffer, but not if it is the end of a field @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY) :DONE else :MIME_HEAD end end CONTENT_DISPOSITION_MAX_PARAMS = 16 CONTENT_DISPOSITION_MAX_BYTES = 1536 def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if (disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]) && disposition.bytesize <= CONTENT_DISPOSITION_MAX_BYTES # ignore actual content-disposition value (should always be form-data) i = disposition.index(';') disposition.slice!(0, i+1) param = nil num_params = 0 # Parse parameter list while i = disposition.index('=') # Only parse up to max parameters, to avoid potential denial of service num_params += 1 break if num_params > CONTENT_DISPOSITION_MAX_PARAMS # Found end of parameter name, ensure forward progress in loop param = disposition.slice!(0, i+1) # Remove ending equals and preceding whitespace from parameter name param.chomp!('=') param.lstrip! if disposition[0] == '"' # Parameter value is quoted, parse it, handling backslash escapes disposition.slice!(0, 1) value = String.new while i = disposition.index(/(["\\])/) c = $1 # Append all content until ending quote or escape value << disposition.slice!(0, i) # Remove either backslash or ending quote, # ensures forward progress in loop disposition.slice!(0, 1) # stop parsing parameter value if found ending quote break if c == '"' escaped_char = disposition.slice!(0, 1) if param == 'filename' && escaped_char != '"' # Possible IE uploaded filename, append both escape backslash and value value << c << escaped_char else # Other only append escaped value value << escaped_char end end else if i = disposition.index(';') # Parameter value unquoted (which may be invalid), value ends at semicolon value = disposition.slice!(0, i) else # If no ending semicolon, assume remainder of line is value and stop # parsing disposition.strip! value = disposition disposition = '' end end case param when 'name' name = value when 'filename' filename = value when 'filename*' filename_star = value # else # ignore other parameters end # skip trailing semicolon, to proceed to next parameter if i = disposition.index(';') disposition.slice!(0, i+1) end end else name = head[MULTIPART_CONTENT_ID, 1] end if filename_star encoding, _, filename = filename_star.split("'", 3) filename = normalize_filename(filename || '') filename.force_encoding(find_encoding(encoding)) elsif filename filename = $1 if filename =~ /^"(.*)"$/ filename = normalize_filename(filename) end if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup end @collector.on_mime_head @mime_index, head, filename, content_type, name @state = :MIME_BODY else :want_read end end def handle_mime_body if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet body = body_with_boundary.sub(@body_regex_at_end, '') # remove the boundary from the string @collector.on_mime_body @mime_index, body @sbuf.pos += body.length + 2 # skip \r\n after the content @state = :CONSUME_TOKEN @mime_index += 1 else # Save what we have so far if @rx_max_size < @sbuf.rest_size delta = @sbuf.rest_size - @rx_max_size @collector.on_mime_body @mime_index, @sbuf.peek(delta) @sbuf.pos += delta @sbuf.string = @sbuf.rest end :want_read end end # Scan until the we find the start or end of the boundary. # If we find it, return the appropriate symbol for the start or # end of the boundary. If we don't find the start or end of the # boundary, clear the buffer and return nil. def consume_boundary if read_buffer = @sbuf.scan_until(@body_regex) read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY else @sbuf.terminate nil end end def normalize_filename(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! filename.split(/[\/\\]/).last || String.new end CHARSET = "charset" deprecate_constant :CHARSET def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 name.force_encoding(encoding) return if filename if content_type list = content_type.split(';') type_subtype = list.first type_subtype.strip! if TEXT_PLAIN == type_subtype rest = list.drop 1 rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') if k == "charset" encoding = find_encoding(v) end end end end name.force_encoding(encoding) body.force_encoding(encoding) end # Return the related Encoding object. However, because # enc is submitted by the user, it may be invalid, so # use a binary encoding in that case. def find_encoding(enc) Encoding.find enc rescue ArgumentError Encoding::BINARY end def handle_empty_content!(content) if content.nil? || content.empty? raise EmptyContentError end end end end end