Browse Source

Working version with diffs

webui
aeris 4 years ago
parent
commit
ab4295d492
  1. 47
      app/lib/html.rb
  2. 51
      app/lib/http.rb
  3. 3
      app/models/diff.rb
  4. 4
      app/models/group.rb
  5. 4
      app/models/site.rb
  6. 6
      app/models/target.rb
  7. 34
      bin/cli.rb
  8. 2
      config/initializers/fixture.rb
  9. 71
      db/schema.rb

47
app/lib/html.rb

@ -0,0 +1,47 @@
class Html
def initialize(content)
@content = content
charset = self.charset
@content.force_encoding charset if charset
end
def title
html = self.parse
return nil unless html
tag = html.at 'head title'
tag&.text
end
def to_s
@content
end
def self.to_s(content)
self.new(content).to_s
end
def parse
@parse ||= Nokogiri::HTML.parse @content
end
def charset
html = self.parse
# Content-Type charset seems already processed by HTTParty
# charset = @response.headers['content-type']
# charset = /text\/html;\s*charset=(.*)/i.match charset
# return charset[1] if charset
charset = html.at 'head meta[charset]'
return charset['charset'] if charset
charset = html.at 'head meta[http-equiv="Content-Type"]'
if charset
charset = charset['content']
charset = /text\/html;\s*charset=(.*)/i.match charset
return charset[1] if charset
end
nil
end
end

51
app/lib/http.rb

@ -1,3 +1,5 @@
require 'xz'
class Http
def initialize(url)
@url = url
@ -17,34 +19,14 @@ class Http
end
def title
html = self.parse
html = self.html
return nil unless html
tag = html.at 'head title'
tag = html.parse.at 'head title'
tag&.text
end
def charset
return nil unless self.html?
body = @response.body
html = Nokogiri::HTML.parse body
# Content-Type charset seems already processed by HTTParty
# charset = @response.headers['content-type']
# charset = /text\/html;\s*charset=(.*)/i.match charset
# return charset[1] if charset
charset = html.at 'head meta[charset]'
return charset['charset'] if charset
charset = html.at 'head meta[http-equiv="Content-Type"]'
if charset
charset = charset['content']
charset = /text\/html;\s*charset=(.*)/i.match charset
return charset[1] if charset
end
nil
self.html&.charset
end
def body
@ -57,18 +39,24 @@ class Http
protected
def html
return nil unless self.html?
@html ||= Html.new @response.body
end
DATE_FORMAT = '%Y%m%d_%H%M%S'.freeze
def self.prefix(url)
Digest::SHA256.hexdigest url
end
HTTP_CACHE_DIR = File.join Rails.root, 'tmp', 'cache', 'http'
FileUtils.mkdir_p HTTP_CACHE_DIR unless Dir.exist? HTTP_CACHE_DIR
def cache(response)
return unless ENV['DEBUG_HTTP']
prefix = self.class.prefix @url
dir = File.join Rails.root, 'tmp', 'cache', 'http'
FileUtils.mkdir_p dir unless Dir.exist? dir
body = response.body
last = Dir[File.join dir, "#{prefix}_*"].sort.last
@ -79,11 +67,22 @@ class Http
end
time = Time.now.strftime DATE_FORMAT
file = prefix + '_' + time
file = prefix + '_' + time + '.xz'
file = File.join dir, file
body = XZ.compress body, level: 9
File.binwrite file, body
end
def self.cache(file)
body = File.binread file
XZ.decompress body
end
def self.caches(url)
prefix = self.prefix url
Dir["#{HTTP_CACHE_DIR}/#{prefix}_*.xz"]
end
def grab
response = HTTParty.get @url, timeout: 10.seconds
raise "Receive #{response.code}" unless response.success?

3
app/models/diff.rb

@ -0,0 +1,3 @@
class Diff < ApplicationRecord
belongs_to :site
end

4
app/models/group.rb

@ -1,7 +1,7 @@
class Group < ApplicationRecord
belongs_to :template, optional: true
has_many :sites
has_many :targets
has_many :sites, dependent: :delete_all
has_many :targets, dependent: :delete_all
validates :name, uniqueness: true

4
app/models/site.rb

@ -1,8 +1,8 @@
class Site < ApplicationRecord
belongs_to :group, optional: true
belongs_to :template, optional: true
has_many :targets
has_many :diffs
has_many :targets, dependent: :delete_all
has_many :diffs, dependent: :delete_all
validates :url, presence: true

6
app/models/target.rb

@ -2,7 +2,6 @@ class Target < ApplicationRecord
has_many :templates
has_many :groups
has_many :sites
has_many :checks
def to_s
return self.name if self.name
@ -23,6 +22,11 @@ class Target < ApplicationRecord
}.compact
end
def self.from_h(hash)
hash.symbolize_keys!
self.new name: hash[:name], from: hash[:from], to: hash[:from], css: hash[:css]
end
def extract_boundary(content)
return nil unless content
if self.from

34
bin/cli.rb

@ -63,27 +63,37 @@ class App < Thor
desc 'redo <url>*', 'Redo diff from cache'
def redo(urls = nil)
cache = 'tmp/cache/http'
results = Hash.new 0
self.process urls do |site|
site._changes.delete_all
puts site.url.colorize :yellow
site.diffs.delete_all
reference = nil
fp = Http.prefix site.url
Dir["#{cache}/#{fp}_*"].sort.each do |file|
Http.caches(site.url).sort.each do |file|
name = File.basename file
date = name.split('_', 2).last
date = DateTime.strptime date, Http::DATE_FORMAT
content = File.read file
unless reference
site.update! reference: content
else
status = site.diff! reference, content, date: date
ap site: site.url, date: date, status: status
end
content = Html.to_s Http.cache file
status = unless reference
site.update! reference: content
:reference
else
site.diff! reference, content, date: date
end
results[status] += 1
color = COLORS[status]
puts " #{date}: #{status.to_s.colorize color}"
reference = content
end
nil
end
results.each do |k, v|
color = COLORS[k]
puts "#{k.to_s.colorize color}: #{v}"
end
end
# desc 'redo <url> <date1> <date2>', 'Redo check from cache'

2
config/initializers/fixture.rb

@ -9,7 +9,7 @@ module Diffy
self
end
def self.dump(diff, options = {})
def self.load(diff, options = {})
self.new(nil, nil, options).load diff
end

71
db/schema.rb

@ -0,0 +1,71 @@
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
#
# Note that this schema.rb definition is the authoritative source for your
# database schema. If you need to create the application database on another
# system, you should be using db:schema:load, not running all the migrations
# from scratch. The latter is a flawed and unsustainable approach (the more migrations
# you'll amass, the slower it'll run and the greater likelihood for issues).
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2018_11_27_204747) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
create_table "diffs", force: :cascade do |t|
t.json "content", null: false
t.bigint "site_id", null: false
t.datetime "created_at", null: false
t.index ["site_id"], name: "index_diffs_on_site_id"
end
create_table "groups", force: :cascade do |t|
t.string "name", null: false
t.bigint "template_id"
t.index ["name"], name: "index_groups_on_name", unique: true
t.index ["template_id"], name: "index_groups_on_template_id"
end
create_table "sites", force: :cascade do |t|
t.string "url", null: false
t.string "name"
t.text "reference"
t.bigint "group_id"
t.bigint "template_id"
t.string "last_error"
t.datetime "checked_at"
t.datetime "changed_at"
t.index ["group_id"], name: "index_sites_on_group_id"
t.index ["name"], name: "index_sites_on_name"
t.index ["template_id"], name: "index_sites_on_template_id"
end
create_table "targets", force: :cascade do |t|
t.string "name"
t.string "css"
t.string "from"
t.string "to"
t.bigint "template_id"
t.bigint "group_id"
t.bigint "site_id"
t.index ["group_id"], name: "index_targets_on_group_id"
t.index ["site_id"], name: "index_targets_on_site_id"
t.index ["template_id"], name: "index_targets_on_template_id"
end
create_table "templates", force: :cascade do |t|
t.string "name"
t.index ["name"], name: "index_templates_on_name", unique: true
end
add_foreign_key "diffs", "sites"
add_foreign_key "groups", "templates"
add_foreign_key "sites", "groups"
add_foreign_key "sites", "templates"
add_foreign_key "targets", "groups"
add_foreign_key "targets", "sites"
add_foreign_key "targets", "templates"
end
Loading…
Cancel
Save