|
| 1 | +# frozen_string_literal: true |
| 2 | + |
1 | 3 | require 'active_support/multibyte/unicode' |
2 | 4 |
|
3 | 5 | module UTF8Cleaner |
4 | 6 | class Middleware |
5 | | - |
6 | | - SANITIZE_ENV_KEYS = [ |
7 | | - "HTTP_REFERER", |
8 | | - "HTTP_USER_AGENT", |
9 | | - "PATH_INFO", |
10 | | - "QUERY_STRING", |
11 | | - "REQUEST_PATH", |
12 | | - "REQUEST_URI", |
13 | | - "HTTP_COOKIE" |
14 | | - ] |
| 7 | + SANITIZE_ENV_KEYS = %w[ |
| 8 | + HTTP_REFERER |
| 9 | + HTTP_USER_AGENT |
| 10 | + PATH_INFO |
| 11 | + QUERY_STRING |
| 12 | + REQUEST_PATH |
| 13 | + REQUEST_URI |
| 14 | + HTTP_COOKIE |
| 15 | + ].freeze |
15 | 16 |
|
16 | 17 | def initialize(app) |
17 | | - @app = app |
| 18 | + @app = app |
18 | 19 | end |
19 | 20 |
|
20 | 21 | def call(env) |
21 | | - @app.call(sanitize_env(env)) |
| 22 | + @app.call(sanitize_env(env)) |
22 | 23 | end |
23 | 24 |
|
24 | 25 | private |
25 | 26 |
|
26 | 27 | include ActiveSupport::Multibyte::Unicode |
27 | 28 |
|
28 | 29 | def sanitize_env(env) |
| 30 | + env = env.dup # Do not mutate the original |
29 | 31 | sanitize_env_keys(env) |
30 | 32 | sanitize_env_rack_input(env) |
31 | 33 | env |
32 | 34 | end |
33 | 35 |
|
34 | 36 | def sanitize_env_keys(env) |
35 | 37 | SANITIZE_ENV_KEYS.each do |key| |
36 | | - next unless value = env[key] |
| 38 | + next unless (value = env[key]) |
37 | 39 | env[key] = cleaned_string(value) |
38 | 40 | end |
39 | 41 | end |
40 | 42 |
|
41 | 43 | def sanitize_env_rack_input(env) |
| 44 | + return unless env['rack.input'] |
| 45 | + |
42 | 46 | case env['CONTENT_TYPE'] |
43 | | - when 'application/x-www-form-urlencoded' |
| 47 | + when %r{\Aapplication/x-www-form-urlencoded}i |
44 | 48 | # This data gets the full cleaning treatment |
45 | | - cleaned_value = cleaned_string(env['rack.input'].read) |
46 | | - env['rack.input'] = StringIO.new(cleaned_value) if cleaned_value |
47 | | - env['rack.input'].rewind |
48 | | - when 'application/json' |
| 49 | + input_data = read_input(env['rack.input']) |
| 50 | + return unless input_data |
| 51 | + |
| 52 | + cleaned_value = cleaned_string(input_data) |
| 53 | + env['rack.input'] = StringIO.new(cleaned_value) |
| 54 | + when %r{\Aapplication/json}i |
49 | 55 | # This data only gets cleaning of invalid UTF-8 (e.g. from another charset) |
50 | 56 | # but we do not URI-decode it. |
51 | | - rack_input = env['rack.input'].read |
52 | | - if rack_input && !rack_input.ascii_only? |
53 | | - env['rack.input'] = StringIO.new(tidy_bytes(rack_input)) |
54 | | - end |
55 | | - env['rack.input'].rewind |
56 | | - when 'multipart/form-data' |
57 | | - # Don't process the data since it may contain binary content |
| 57 | + input_data = read_input(env['rack.input']) |
| 58 | + return unless input_data && !input_data.ascii_only? |
| 59 | + |
| 60 | + env['rack.input'] = StringIO.new(tidy_bytes(input_data)) |
58 | 61 | else |
59 | | - # Unknown content type. Leave it alone |
| 62 | + # Do not process multipart/form-data since it may contain binary content. |
| 63 | + # Leave all other unknown content types alone. |
60 | 64 | end |
61 | 65 | end |
62 | 66 |
|
| 67 | + def read_input(input) |
| 68 | + return nil unless input |
| 69 | + |
| 70 | + data = input.read |
| 71 | + input.rewind if input.respond_to?(:rewind) |
| 72 | + data |
| 73 | + end |
| 74 | + |
63 | 75 | def cleaned_string(value) |
| 76 | + return value if value.nil? || value.empty? |
| 77 | + |
| 78 | + value = value.to_s |
64 | 79 | value = tidy_bytes(value) unless value.ascii_only? |
65 | 80 | value = URIString.new(value).cleaned if value.include?('%') |
66 | 81 | value |
|
0 commit comments