class RuboCop::StringUtil::Jaro

This class computes Jaro distance, which is a measure of similarity between two strings.

Attributes

longer[R]
shorter[R]

Public Class Methods

distance(*args) click to toggle source
# File lib/rubocop/string_util.rb, line 16
def self.distance(*args)
  new(*args).distance
end
new(a, b) click to toggle source
# File lib/rubocop/string_util.rb, line 20
def initialize(a, b)
  if a.size < b.size
    @shorter = a
    @longer = b
  else
    @shorter = b
    @longer = a
  end
end

Public Instance Methods

distance() click to toggle source
# File lib/rubocop/string_util.rb, line 30
def distance
  @distance ||= compute_distance
end

Private Instance Methods

compute_distance() click to toggle source
# File lib/rubocop/string_util.rb, line 36
def compute_distance
  common_chars_a, common_chars_b = find_common_characters
  matched_count = common_chars_a.size

  return 0.0 if matched_count.zero?

  transposition_count =
    count_transpositions(common_chars_a, common_chars_b)

  compute_non_zero_distance(matched_count.to_f, transposition_count)
end
compute_non_zero_distance(matched_count, transposition_count) click to toggle source
# File lib/rubocop/string_util.rb, line 79
def compute_non_zero_distance(matched_count, transposition_count)
  sum = (matched_count / shorter.size.to_f) +
        (matched_count / longer.size.to_f) +
        ((matched_count - transposition_count / 2) / matched_count)

  sum / 3.0
end
count_transpositions(common_chars_a, common_chars_b) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/rubocop/string_util.rb, line 73
def count_transpositions(common_chars_a, common_chars_b)
  common_chars_a.size.times.count do |index|
    common_chars_a[index] != common_chars_b[index]
  end
end
find_common_characters() click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/rubocop/string_util.rb, line 49
def find_common_characters
  common_chars_of_shorter = Array.new(shorter.size)
  common_chars_of_longer = Array.new(longer.size)

  shorter.each_char.with_index do |shorter_char, shorter_index|
    matching_index_range(shorter_index).each do |longer_index|
      longer_char = longer.chars[longer_index]

      next unless shorter_char == longer_char

      common_chars_of_shorter[shorter_index] = shorter_char
      common_chars_of_longer[longer_index] = longer_char

      # Mark the matching character as already used
      longer.chars[longer_index] = nil

      break
    end
  end

  [common_chars_of_shorter, common_chars_of_longer].map(&:compact)
end
matching_index_range(origin) click to toggle source
# File lib/rubocop/string_util.rb, line 87
def matching_index_range(origin)
  min = origin - matching_window
  min = 0 if min < 0

  max = origin + matching_window

  min..max
end
matching_window() click to toggle source
# File lib/rubocop/string_util.rb, line 96
def matching_window
  @matching_window ||= (longer.size / 2).to_i - 1
end