Browse Source

Working version with diffs

webui
aeris 2 years ago
parent
commit
ab4295d492
9 changed files with 178 additions and 44 deletions
  1. +47
    -0
      app/lib/html.rb
  2. +25
    -26
      app/lib/http.rb
  3. +3
    -0
      app/models/diff.rb
  4. +2
    -2
      app/models/group.rb
  5. +2
    -2
      app/models/site.rb
  6. +5
    -1
      app/models/target.rb
  7. +22
    -12
      bin/cli.rb
  8. +1
    -1
      config/initializers/fixture.rb
  9. +71
    -0
      db/schema.rb

+ 47
- 0
app/lib/html.rb View File

@@ -0,0 +1,47 @@
class Html
def initialize(content)
@content = content
charset = self.charset
@content.force_encoding charset if charset
end

def title
html = self.parse
return nil unless html
tag = html.at 'head title'
tag&.text
end

def to_s
@content
end

def self.to_s(content)
self.new(content).to_s
end

def parse
@parse ||= Nokogiri::HTML.parse @content
end

def charset
html = self.parse

# Content-Type charset seems already processed by HTTParty
# charset = @response.headers['content-type']
# charset = /text\/html;\s*charset=(.*)/i.match charset
# return charset[1] if charset

charset = html.at 'head meta[charset]'
return charset['charset'] if charset

charset = html.at 'head meta[http-equiv="Content-Type"]'
if charset
charset = charset['content']
charset = /text\/html;\s*charset=(.*)/i.match charset
return charset[1] if charset
end

nil
end
end

+ 25
- 26
app/lib/http.rb View File

@@ -1,3 +1,5 @@
require 'xz'

class Http
def initialize(url)
@url = url
@@ -17,34 +19,14 @@ class Http
end

def title
html = self.parse
html = self.html
return nil unless html
tag = html.at 'head title'
tag = html.parse.at 'head title'
tag&.text
end

def charset
return nil unless self.html?

body = @response.body
html = Nokogiri::HTML.parse body

# Content-Type charset seems already processed by HTTParty
# charset = @response.headers['content-type']
# charset = /text\/html;\s*charset=(.*)/i.match charset
# return charset[1] if charset

charset = html.at 'head meta[charset]'
return charset['charset'] if charset

charset = html.at 'head meta[http-equiv="Content-Type"]'
if charset
charset = charset['content']
charset = /text\/html;\s*charset=(.*)/i.match charset
return charset[1] if charset
end

nil
self.html&.charset
end

def body
@@ -57,18 +39,24 @@ class Http

protected

def html
return nil unless self.html?
@html ||= Html.new @response.body
end

DATE_FORMAT = '%Y%m%d_%H%M%S'.freeze

def self.prefix(url)
Digest::SHA256.hexdigest url
end

HTTP_CACHE_DIR = File.join Rails.root, 'tmp', 'cache', 'http'
FileUtils.mkdir_p HTTP_CACHE_DIR unless Dir.exist? HTTP_CACHE_DIR

def cache(response)
return unless ENV['DEBUG_HTTP']

prefix = self.class.prefix @url
dir = File.join Rails.root, 'tmp', 'cache', 'http'
FileUtils.mkdir_p dir unless Dir.exist? dir

body = response.body
last = Dir[File.join dir, "#{prefix}_*"].sort.last
@@ -79,11 +67,22 @@ class Http
end

time = Time.now.strftime DATE_FORMAT
file = prefix + '_' + time
file = prefix + '_' + time + '.xz'
file = File.join dir, file
body = XZ.compress body, level: 9
File.binwrite file, body
end

def self.cache(file)
body = File.binread file
XZ.decompress body
end

def self.caches(url)
prefix = self.prefix url
Dir["#{HTTP_CACHE_DIR}/#{prefix}_*.xz"]
end

def grab
response = HTTParty.get @url, timeout: 10.seconds
raise "Receive #{response.code}" unless response.success?


+ 3
- 0
app/models/diff.rb View File

@@ -0,0 +1,3 @@
class Diff < ApplicationRecord
belongs_to :site
end

+ 2
- 2
app/models/group.rb View File

@@ -1,7 +1,7 @@
class Group < ApplicationRecord
belongs_to :template, optional: true
has_many :sites
has_many :targets
has_many :sites, dependent: :delete_all
has_many :targets, dependent: :delete_all

validates :name, uniqueness: true



+ 2
- 2
app/models/site.rb View File

@@ -1,8 +1,8 @@
class Site < ApplicationRecord
belongs_to :group, optional: true
belongs_to :template, optional: true
has_many :targets
has_many :diffs
has_many :targets, dependent: :delete_all
has_many :diffs, dependent: :delete_all

validates :url, presence: true



+ 5
- 1
app/models/target.rb View File

@@ -2,7 +2,6 @@ class Target < ApplicationRecord
has_many :templates
has_many :groups
has_many :sites
has_many :checks

def to_s
return self.name if self.name
@@ -23,6 +22,11 @@ class Target < ApplicationRecord
}.compact
end

def self.from_h(hash)
hash.symbolize_keys!
self.new name: hash[:name], from: hash[:from], to: hash[:from], css: hash[:css]
end

def extract_boundary(content)
return nil unless content
if self.from


+ 22
- 12
bin/cli.rb View File

@@ -63,27 +63,37 @@ class App < Thor
desc 'redo <url>*', 'Redo diff from cache'

def redo(urls = nil)
cache = 'tmp/cache/http'
results = Hash.new 0

self.process urls do |site|
site._changes.delete_all
puts site.url.colorize :yellow

site.diffs.delete_all
reference = nil
fp = Http.prefix site.url
Dir["#{cache}/#{fp}_*"].sort.each do |file|
Http.caches(site.url).sort.each do |file|
name = File.basename file
date = name.split('_', 2).last
date = DateTime.strptime date, Http::DATE_FORMAT
content = File.read file

unless reference
site.update! reference: content
else
status = site.diff! reference, content, date: date
ap site: site.url, date: date, status: status
end
content = Html.to_s Http.cache file

status = unless reference
site.update! reference: content
:reference
else
site.diff! reference, content, date: date
end
results[status] += 1
color = COLORS[status]
puts " #{date}: #{status.to_s.colorize color}"
reference = content
end
nil
end

results.each do |k, v|
color = COLORS[k]
puts "#{k.to_s.colorize color}: #{v}"
end
end

# desc 'redo <url> <date1> <date2>', 'Redo check from cache'


+ 1
- 1
config/initializers/fixture.rb View File

@@ -9,7 +9,7 @@ module Diffy
self
end

def self.dump(diff, options = {})
def self.load(diff, options = {})
self.new(nil, nil, options).load diff
end



+ 71
- 0
db/schema.rb View File

@@ -0,0 +1,71 @@
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
#
# Note that this schema.rb definition is the authoritative source for your
# database schema. If you need to create the application database on another
# system, you should be using db:schema:load, not running all the migrations
# from scratch. The latter is a flawed and unsustainable approach (the more migrations
# you'll amass, the slower it'll run and the greater likelihood for issues).
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2018_11_27_204747) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"

create_table "diffs", force: :cascade do |t|
t.json "content", null: false
t.bigint "site_id", null: false
t.datetime "created_at", null: false
t.index ["site_id"], name: "index_diffs_on_site_id"
end

create_table "groups", force: :cascade do |t|
t.string "name", null: false
t.bigint "template_id"
t.index ["name"], name: "index_groups_on_name", unique: true
t.index ["template_id"], name: "index_groups_on_template_id"
end

create_table "sites", force: :cascade do |t|
t.string "url", null: false
t.string "name"
t.text "reference"
t.bigint "group_id"
t.bigint "template_id"
t.string "last_error"
t.datetime "checked_at"
t.datetime "changed_at"
t.index ["group_id"], name: "index_sites_on_group_id"
t.index ["name"], name: "index_sites_on_name"
t.index ["template_id"], name: "index_sites_on_template_id"
end

create_table "targets", force: :cascade do |t|
t.string "name"
t.string "css"
t.string "from"
t.string "to"
t.bigint "template_id"
t.bigint "group_id"
t.bigint "site_id"
t.index ["group_id"], name: "index_targets_on_group_id"
t.index ["site_id"], name: "index_targets_on_site_id"
t.index ["template_id"], name: "index_targets_on_template_id"
end

create_table "templates", force: :cascade do |t|
t.string "name"
t.index ["name"], name: "index_templates_on_name", unique: true
end

add_foreign_key "diffs", "sites"
add_foreign_key "groups", "templates"
add_foreign_key "sites", "groups"
add_foreign_key "sites", "templates"
add_foreign_key "targets", "groups"
add_foreign_key "targets", "sites"
add_foreign_key "targets", "templates"
end

Loading…
Cancel
Save