summaryrefslogtreecommitdiff
path: root/app/models/ssh_host_key.rb
blob: 7e34988c7a014ef944141267a2c8472fdc7a8950 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# frozen_string_literal: true

# Detected SSH host keys are transiently stored in Redis
class SshHostKey
  class Fingerprint < Gitlab::SSHPublicKey
    attr_reader :index

    def initialize(key, index: nil)
      super(key)

      @index = index
    end

    def as_json(*)
      { bits: bits, fingerprint: fingerprint, type: type, index: index }
    end
  end

  include ReactiveCaching

  self.reactive_cache_key = ->(key) { [key.class.to_s, key.id] }

  # Do not refresh the data in the background - it is not expected to change.
  # This is achieved by making the lifetime shorter than the refresh interval.
  self.reactive_cache_refresh_interval = 15.minutes
  self.reactive_cache_lifetime = 10.minutes
  self.reactive_cache_work_type = :external_dependency

  def self.find_by(opts = {})
    opts = HashWithIndifferentAccess.new(opts)
    return unless opts.key?(:id)

    project_id, url = opts[:id].split(':', 2)
    project = Project.find_by(id: project_id)

    project.presence && new(project: project, url: url)
  end

  def self.fingerprint_host_keys(data)
    return [] unless data.is_a?(String)

    data
      .each_line
      .each_with_index
      .map { |line, index| Fingerprint.new(line, index: index) }
      .select(&:valid?)
  end

  attr_reader :project, :url, :compare_host_keys

  def initialize(project:, url:, compare_host_keys: nil)
    @project = project
    @url = normalize_url(url)
    @compare_host_keys = compare_host_keys
  end

  # Needed for reactive caching
  def self.primary_key
    :id
  end

  def id
    [project.id, url].join(':')
  end

  def as_json(*)
    {
      host_keys_changed: host_keys_changed?,
      fingerprints: fingerprints,
      known_hosts: known_hosts
    }
  end

  def known_hosts
    with_reactive_cache { |data| data[:known_hosts] }
  end

  def fingerprints
    @fingerprints ||= self.class.fingerprint_host_keys(known_hosts)
  end

  # Returns true if the known_hosts data differs from the version passed in at
  # initialization as `compare_host_keys`. Comments, ordering, etc, is ignored
  def host_keys_changed?
    cleanup(known_hosts) != cleanup(compare_host_keys)
  end

  def error
    with_reactive_cache { |data| data[:error] }
  end

  def calculate_reactive_cache
    known_hosts, errors, status =
      Open3.popen3({}, *%W[ssh-keyscan -T 5 -p #{url.port} -f-]) do |stdin, stdout, stderr, wait_thr|
        stdin.puts(url.host)
        stdin.close

        [
          cleanup(stdout.read),
          cleanup(stderr.read),
          wait_thr.value
        ]
      end

    # ssh-keyscan returns an exit code 0 in several error conditions, such as an
    # unknown hostname, so check both STDERR and the exit code
    if status.success? && !errors.present?
      { known_hosts: known_hosts }
    else
      Gitlab::AppLogger.debug("Failed to detect SSH host keys for #{id}: #{errors}")

      { error: 'Failed to detect SSH host keys' }
    end
  end

  private

  # Remove comments and duplicate entries
  def cleanup(data)
    data
      .to_s
      .each_line
      .reject { |line| line.start_with?('#') || line.chomp.empty? }
      .uniq
      .sort
      .join
  end

  def normalize_url(url)
    full_url = ::Addressable::URI.parse(url)
    raise ArgumentError.new("Invalid URL") unless full_url&.scheme == 'ssh'

    Addressable::URI.parse("ssh://#{full_url.host}:#{full_url.inferred_port}")
  rescue Addressable::URI::InvalidURIError
    raise ArgumentError.new("Invalid URL")
  end
end