parent
39bef45c92
commit
99b3f29b41
@ -0,0 +1,13 @@ |
||||
guard :rails do |
||||
watch('Gemfile.lock') |
||||
watch(%r{^(config|lib)/.*}) |
||||
end |
||||
|
||||
guard :livereload do |
||||
watch(%r{app/views/.+\.(erb|haml|slim)$}) |
||||
watch(%r{app/helpers/.+\.rb}) |
||||
watch(%r{public/.+\.(css|js|html)}) |
||||
watch(%r{config/locales/.+\.yml}) |
||||
# Rails Assets Pipeline |
||||
watch(%r{(app|vendor)(/assets/\w+/(.+\.(css|js|html|png|jpg|coffee|scss))).*}) { |m| "/assets/#{m[3]}" } |
||||
end |
@ -0,0 +1,71 @@ |
||||
class Http |
||||
def initialize(url) |
||||
@url = url |
||||
@response = self.grab |
||||
end |
||||
|
||||
def code |
||||
@response.code |
||||
end |
||||
|
||||
def success? |
||||
@response.success? |
||||
end |
||||
|
||||
def html? |
||||
@response.content_type == 'text/html' |
||||
end |
||||
|
||||
def title |
||||
html = self.parse |
||||
return nil unless html |
||||
tag = html.at 'head title' |
||||
tag&.text |
||||
end |
||||
|
||||
def charset |
||||
return nil unless self.html? |
||||
|
||||
body = @response.body |
||||
html = Nokogiri::HTML.parse body |
||||
|
||||
# Content-Type charset seems already processed by HTTParty |
||||
# charset = @response.headers['content-type'] |
||||
# charset = /text\/html;\s*charset=(.*)/i.match charset |
||||
# return charset[1] if charset |
||||
|
||||
charset = html.at 'head meta[charset]' |
||||
return charset['charset'] if charset |
||||
|
||||
charset = html.at 'head meta[http-equiv="Content-Type"]' |
||||
if charset |
||||
charset = charset['content'] |
||||
charset = /text\/html;\s*charset=(.*)/i.match charset |
||||
return charset[1] if charset |
||||
end |
||||
|
||||
nil |
||||
end |
||||
|
||||
def body |
||||
charset = self.charset |
||||
body = @response.body |
||||
body = body.force_encoding charset if charset |
||||
body.encode! 'utf-8' unless body.encoding == Encoding::UTF_8 |
||||
body |
||||
end |
||||
|
||||
protected |
||||
|
||||
def grab |
||||
response = HTTParty.get @url, timeout: 10.seconds |
||||
raise "Receive #{response.code}" unless response.success? |
||||
response |
||||
end |
||||
|
||||
def parse |
||||
return nil unless self.html? |
||||
body = @response.body |
||||
Nokogiri::HTML.parse body |
||||
end |
||||
end |
@ -0,0 +1,78 @@ |
||||
RSpec.describe Http do |
||||
let :site do |
||||
Http.new 'http://localhost/' |
||||
end |
||||
|
||||
it 'must encode to utf-8 if not HTML' do |
||||
stub_request(:any, 'localhost').to_return body: "\xC3\xA9", status: 200, |
||||
headers: { 'Content-Type': 'application/pdf' } |
||||
expect(site.body.encoding).to eq Encoding::UTF_8 |
||||
expect(site.body).to eq "\xC3\xA9" |
||||
end |
||||
|
||||
it 'must encode to utf-8 if HTML and nothing specified' do |
||||
body = <<-HEREDOC |
||||
<html> |
||||
<body>\xC3\xA9</body> |
||||
</html>' |
||||
HEREDOC |
||||
stub_request(:any, 'localhost').to_return body: body, status: 200, |
||||
headers: { 'Content-Type': 'text/html' } |
||||
body = site.body |
||||
expect(body.encoding).to eq Encoding::UTF_8 |
||||
body = Nokogiri::HTML.parse body |
||||
body = body.at('body').content |
||||
expect(body).to eq "\xC3\xA9" |
||||
end |
||||
|
||||
it 'must encode to given content-type charset if nothing specified' do |
||||
body = <<-HEREDOC |
||||
<html> |
||||
<body>\xE9</body> |
||||
</html>' |
||||
HEREDOC |
||||
stub_request(:any, 'localhost').to_return body: body, status: 200, |
||||
headers: { 'Content-Type': 'text/html; charset=iso-8859-1' } |
||||
body = site.body |
||||
expect(body.encoding).to eq Encoding::UTF_8 |
||||
body = Nokogiri::HTML.parse body |
||||
body = body.at('body').content |
||||
expect(body).to eq "\xC3\xA9" |
||||
end |
||||
|
||||
it 'must encode to given meta charset' do |
||||
body = <<-HEREDOC |
||||
<html> |
||||
<head> |
||||
<meta charset="ISO-8859-1"/> |
||||
</head> |
||||
<body>\xE9</body> |
||||
</html>' |
||||
HEREDOC |
||||
stub_request(:any, 'localhost').to_return body: body, status: 200, |
||||
headers: { 'Content-Type': 'text/html' } |
||||
body = site.body |
||||
expect(body.encoding).to eq Encoding::UTF_8 |
||||
body = Nokogiri::HTML.parse body |
||||
body = body.at('body').content |
||||
expect(body).to eq "\xC3\xA9" |
||||
end |
||||
|
||||
it 'must encode to given meta http-equiv' do |
||||
body = <<-HEREDOC |
||||
<html> |
||||
<head> |
||||
<meta http-equiv="Content-Type" content="text/html; CHARSET=iso-8859-1"> |
||||
</head> |
||||
<body>\xE9</body> |
||||
</html>' |
||||
HEREDOC |
||||
stub_request(:any, 'localhost').to_return body: body, status: 200, |
||||
headers: { 'Content-Type': 'text/html' } |
||||
body = site.body |
||||
expect(body.encoding).to eq Encoding::UTF_8 |
||||
body = Nokogiri::HTML.parse body |
||||
body = body.at('body').content |
||||
expect(body).to eq "\xC3\xA9" |
||||
end |
||||
end |
Loading…
Reference in new issue