Fix diff algo

webui
aeris 5 years ago
parent 34c403f81b
commit 6137d60ccf
  1. 24
      app/lib/http.rb
  2. 68
      app/models/check.rb
  3. 93
      app/models/site.rb
  4. 68
      bin/cli.rb
  5. 14
      spec/models/site_spec.rb

@ -57,9 +57,33 @@ class Http
protected
DATE_FORMAT = '%Y%m%d_%H%M%S'.freeze
def cache(response)
return unless ENV['DEBUG_HTTP']
prefix = Digest::SHA256.hexdigest @url
dir = File.join Rails.root, 'tmp', 'cache', 'http'
FileUtils.mkdir_p dirs unless Dir.exist? dir
body = response.body
last = Dir[File.join dir, "#{prefix}_*"].sort.last
if last
old = Digest::SHA256.file(last).hexdigest
new = Digest::SHA256.hexdigest body
return if old == new
end
time = Time.now.strftime DATE_FORMAT
file = prefix + '_' + time
file = File.join dir, file
File.binwrite file, body
end
def grab
response = HTTParty.get @url, timeout: 10.seconds
raise "Receive #{response.code}" unless response.success?
self.cache response
response
end

@ -2,72 +2,28 @@ class Check < ApplicationRecord
belongs_to :site
belongs_to :target
def reference!(content)
target = self.target
reference = target.extract content
self.update! reference: reference, content: reference, checked_at: Time.now, changed_at: nil, last_error: nil
end
def diff!(content, debug: false)
self.checked_at = Time.now
state = :unchanged
begin
target = self.target
reference = self.content
content = target.extract content
changed = reference != content
if changed
puts Utils.diff reference, content if debug
state = :changed
self.content = content
self.changed_at = self.checked_at
end
self.last_error = nil
rescue => e
$stderr.puts e
state = :error
self.last_error = e
end
self.save!
state
end
def diff(context: 3, **kwargs)
reference = self.reference
target = self.target
content = target.extract self.content
Diffy::Diff.new reference, content, context: context, **kwargs
def to_s
self.target.to_s
end
def recalculate!(debug: false)
state = :unchanged
def changed?(reference, content, debug: false)
target = self.target
reference = self.site.reference
content = self.site.content || reference
reference = target.extract reference
content = target.extract content
changed = reference != content
changed_at = self.changed_at
if reference == content
content = nil
changed_at = nil
else
if changed
puts Utils.diff reference, content if debug
state = :changed
changed_at ||= self.checked_at
return true
end
self.update! reference: reference, content: content, changed_at: changed_at
state
false
end
def clear!
self.update! reference: nil, content: nil, checked_at: nil, changed_at: nil, last_error: nil
def diff(reference, content, context: 3, **kwargs)
target = self.target
reference = target.extract reference
content = target.extract content
Diffy::Diff.new reference, content, context: context, **kwargs
end
end

@ -29,17 +29,21 @@ class Site < ApplicationRecord
end
end
def reset!
self.update! reference: nil, content: nil, checked_at: nil, changed_at: nil, last_error: nil
end
def reference!(content)
self.update! reference: content, content: content, checked_at: Time.now, changed_at: nil, last_error: nil
self.checks.each { |c| c.reference! content }
end
STATES = %i[unchanged changed error].freeze
def read!
return unless self.content
self.reference! self.content
end
def update_state(current, state)
current_index = STATES.index current
state_index = STATES.index state
current_index < state_index ? state : current
def clear!
self.update! content: nil, checked_at: Time.now, changed_at: nil, last_error: nil
end
def diff(context: 3, **kwargs)
@ -48,69 +52,70 @@ class Site < ApplicationRecord
Diffy::Diff.new reference, content, context: context, **kwargs
end
def diff!(content, debug: false)
def changed?(reference, content, debug: false)
checks = self.checks
if checks.empty?
if reference != content
puts Utils.diff reference, content if debug
return true
end
return false
end
checks.each do |check|
changed = check.changed? reference, content, debug: debug
return true if changed
end
false
end
def diff!(reference, content, debug: false)
self.checked_at = Time.now
state = :unchanged
begin
reference = self.content
checks = self.checks
if checks.empty?
if reference != content
puts Utils.diff reference, content if debug
state = :changed
end
else
checks.each do |check|
check_state = check.diff! content, debug: debug
state = self.update_state state, check_state
end
end
if state == :changed
changed = self.changed? reference, content, debug: debug
if changed
self.content = content
self.changed_at = self.checked_at
state = :changed
end
self.last_error = nil
rescue => e
$stderr.puts e
self.last_error = e
state = :error
end
self.save!
state
end
def check(debug: false)
reference = self.reference
content = self.grab.body
unless reference
def check!(debug: false)
grab = self.grab
content = grab.body
self.update! name: grab.title unless self.name
unless self.reference
self.reference! content
return :reference
else
return self.diff! content, debug: debug
return self.diff! self.content, content, debug: debug
end
end
def recalculate!(debug: false)
state = :unchanged
reference = self.reference
content = self.content || reference
changed_at = self.changed_at
state = :unchanged
states = self.checks.collect { |c| c.recalculate! debug: debug }.uniq
state = :changed if states.include? :changed
if states.empty? && reference != content
state = :changed
puts Utils.diff reference, content if debug
end
if state == :changed
changed = self.checks.find { |c| c.changed? reference, content, debug: debug }
if changed
state = :changed
changed_at ||= self.checked_at
else
content = nil
changed_at = nil
end
@ -118,14 +123,4 @@ class Site < ApplicationRecord
state
end
def read!
return unless self.content
self.reference! self.content
end
def reset!
self.update! reference: nil, content: nil, checked_at: nil, changed_at: nil, last_error: nil
self.checks.each &:clear!
end
end

@ -18,8 +18,8 @@ end
def display(item)
reference = item.reference
content = item.content
ap reference: fp(reference),
content: fp(content),
ap reference: fp(reference),
content: fp(content),
checked_at: item.checked_at,
changed_at: item.changed_at,
last_error: item.last_error
@ -30,36 +30,50 @@ def display(item)
end
class App < Thor
desc 'check', 'Check given sites for changes'
desc 'check <url>*', 'Check given sites for changes'
method_option :reset, type: :boolean, default: false, aliases: '-r', desc: 'Reset sites before check'
method_option :debug, type: :boolean, default: false, aliases: '-d', desc: 'Activate debug'
COLORS = {
reference: :blue,
unchanged: :green,
changed: :red,
error: { background: :red }
reference: :blue,
unchanged: :green,
changed: :red,
error: { background: :red }
}.freeze
def check(urls = nil)
reset = options[:reset]
debug = options[:debug]
results = Hash.new 0
self.process urls do |site|
site.reset! if reset
result = site.check debug: debug
color = COLORS[result]
result = site.check! debug: debug
results[result] += 1
color = COLORS[result]
result.to_s.colorize color
end
results.each do |k, v|
color = COLORS[k]
puts "#{k.to_s.colorize color}: #{v}"
end
end
desc 'read', 'Mark given sites as read'
desc 'read <url>*', 'Mark given sites as read'
def read(urls = nil)
self.process urls, &:read!
end
desc 'diff', 'Display diff of the given sites'
desc 'clear <url>*', 'Clear given sites'
def clear(urls = nil)
self.process urls, &:clear!
end
desc 'diff <url>*', 'Display diff of the given sites'
def diff(urls = nil)
sites = self.sites urls
@ -76,16 +90,44 @@ class App < Thor
end
end
desc 'recalculate', 'Recalculate state of given sites'
desc 'recalculate <url>*', 'Recalculate state of given sites'
method_option :debug, type: :boolean, default: false, aliases: '-d', desc: 'Activate debug'
def recalculate(urls = nil)
debug = options[:debug]
results = Hash.new 0
self.process urls do |site|
result = site.recalculate! debug: debug
color = COLORS[result]
results[result] += 1
result.to_s.colorize color
end
results.each do |k, v|
color = COLORS[k]
puts "#{k.to_s.colorize color}: #{v}"
end
end
desc 'reset <url>*', 'Reset state of given sites'
def reset(urls = nil)
self.process urls, &:reset!
end
desc 'redo <url> <date1> <date2>', 'Redo check from cache'
def redo(url, date1 = nil, date2 = nil)
site = Site.where(url: url).first
fp = Digest::SHA256.hexdigest url
dir = File.join Rails.root, 'tmp/cache/http'
reference = File.join dir, "#{fp}_#{date1}"
reference = File.read reference
content = File.join dir, "#{fp}_#{date2}"
content = File.read content
ap site.changed? reference, content, debug: true
end
protected
@ -103,8 +145,10 @@ class App < Thor
begin
result = yield site
puts "#{url} #{result}"
result
rescue => e
puts "#{url} #{e.to_s.colorize :red}"
nil
end
end
end

@ -24,7 +24,7 @@ RSpec.describe Site, type: :model do
def check!(content)
stub_page content
site.check
site.check!
end
def reference_and_check!(content)
@ -49,8 +49,8 @@ RSpec.describe Site, type: :model do
expect(site.changed_at).to be_nil
expect(site.content).to eq REFERENCE
expect(check.changed_at).to be_nil
expect(check.content).to eq REFERENCE_TARGET
# expect(check.changed_at).to be_nil
# expect(check.content).to eq REFERENCE_TARGET
end
it 'must change if change with no check' do
@ -69,8 +69,8 @@ RSpec.describe Site, type: :model do
expect(site.changed_at).to be_nil
expect(site.content).to be REFERENCE
expect(check.changed_at).to be_nil
expect(check.content).to eq REFERENCE_TARGET
# expect(check.changed_at).to be_nil
# expect(check.content).to eq REFERENCE_TARGET
end
it 'must change if check changed' do
@ -81,8 +81,8 @@ RSpec.describe Site, type: :model do
expect(site.changed_at).not_to be_nil
expect(site.content).to eq CHANGE_INSIDE_TARGET
expect(check.changed_at).not_to be_nil
expect(check.content).to eq CHANGE_TARGET
# expect(check.changed_at).not_to be_nil
# expect(check.content).to eq CHANGE_TARGET
end
it 'must stay changed if no change after a change' do

Loading…
Cancel
Save