From 58ffb4057fcd67e5c0ba33f76344cc8ad927c72b Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Wed, 9 Nov 2016 20:24:10 -0800 Subject: Add UTF-8 check test --- tests/encoding.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tests/encoding.rb (limited to 'tests/encoding.rb') diff --git a/tests/encoding.rb b/tests/encoding.rb new file mode 100644 index 00000000..c4d41d19 --- /dev/null +++ b/tests/encoding.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby +$file_count = 0; +markdown_files = Dir["./**/*.html.markdown"] +markdown_files.each do |file| + begin + file_bin = File.open(file, "rb") + contents = file_bin.read + if ! contents.valid_encoding? + puts "#{file} has an invalid encoding! Please save the file in UTF-8!" + else + $file_count = $file_count + 1 + end + rescue Exception => msg + puts msg + end +end +files_failed = markdown_files.length - $file_count +if files_failed != 0 + puts "FAILURE!!! #{files_failed} files were unable to be validated as UTF-8!" + puts "Please resave the file as UTF-8." + exit 1 +else + puts "Success. All #{$file_count} files passed UTF-8 validity checks" + exit 0 +end -- cgit v1.2.3 From 70d6977ccc07b667da0ed165b7706afbb5190816 Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Wed, 9 Nov 2016 20:57:13 -0800 Subject: Use charlock_holmes to do encoding detection. In my tests it has properly identified incorrect encodings that used to be present on older commits. This will help ensure this won't happen again, giving people instant feedback and allowing all pull requests to be checked --- tests/encoding.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tests/encoding.rb') diff --git a/tests/encoding.rb b/tests/encoding.rb index c4d41d19..ae7e495f 100644 --- a/tests/encoding.rb +++ b/tests/encoding.rb @@ -1,14 +1,18 @@ #!/usr/bin/env ruby +require 'charlock_holmes' $file_count = 0; markdown_files = Dir["./**/*.html.markdown"] markdown_files.each do |file| begin - file_bin = File.open(file, "rb") - contents = file_bin.read - if ! contents.valid_encoding? - puts "#{file} has an invalid encoding! Please save the file in UTF-8!" - else + contents = File.read(file) + detection = CharlockHolmes::EncodingDetector.detect(contents) + case detection[:encoding] + when 'UTF-8' + $file_count = $file_count + 1 + when 'ISO-8859-1' $file_count = $file_count + 1 + else + puts "#{file} was detected as #{detection[:encoding]} encoding! Please save the file in UTF-8!" end rescue Exception => msg puts msg @@ -20,6 +24,6 @@ if files_failed != 0 puts "Please resave the file as UTF-8." exit 1 else - puts "Success. All #{$file_count} files passed UTF-8 validity checks" + puts "Success. All #{$file_count} files Ruby's UTF-8 validity checks. This won't catch most problems." exit 0 end -- cgit v1.2.3 From 03980b63c7f7fa5bfd6a74e23613bcf82392105a Mon Sep 17 00:00:00 2001 From: Adam Date: Wed, 9 Nov 2016 22:10:35 -0800 Subject: Fix encoding test, then make tests actually fail. --- tests/encoding.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tests/encoding.rb') diff --git a/tests/encoding.rb b/tests/encoding.rb index ae7e495f..e8e18e4d 100644 --- a/tests/encoding.rb +++ b/tests/encoding.rb @@ -9,16 +9,21 @@ markdown_files.each do |file| case detection[:encoding] when 'UTF-8' $file_count = $file_count + 1 - when 'ISO-8859-1' - $file_count = $file_count + 1 else - puts "#{file} was detected as #{detection[:encoding]} encoding! Please save the file in UTF-8!" + enc = detection[:encoding] + if not enc.start_with? 'ISO-8859-' + puts "#{file} was detected as #{detection[:encoding]} encoding! Please save the file in UTF-8!" + else + $file_count += 1 + end end rescue Exception => msg puts msg end end + files_failed = markdown_files.length - $file_count + if files_failed != 0 puts "FAILURE!!! #{files_failed} files were unable to be validated as UTF-8!" puts "Please resave the file as UTF-8." -- cgit v1.2.3 From bb5e514321732ee6fac150cdbfb587cca5c20433 Mon Sep 17 00:00:00 2001 From: Samantha McVey Date: Wed, 9 Nov 2016 22:15:03 -0800 Subject: Update rakefile so that it will return an exit code of 1 if there is a failure (Travis CI needs this). Make ISO-8859* encoding detections notices not failures --- tests/encoding.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'tests/encoding.rb') diff --git a/tests/encoding.rb b/tests/encoding.rb index e8e18e4d..a0b3b184 100644 --- a/tests/encoding.rb +++ b/tests/encoding.rb @@ -9,26 +9,24 @@ markdown_files.each do |file| case detection[:encoding] when 'UTF-8' $file_count = $file_count + 1 + when 'ISO-8859-1' + $file_count = $file_count + 1 + when /ISO-8859/ + puts "Notice: #{file} was detected as #{detection[:encoding]} encoding. Everything is probably fine." + $file_count = $file_count + 1 else - enc = detection[:encoding] - if not enc.start_with? 'ISO-8859-' - puts "#{file} was detected as #{detection[:encoding]} encoding! Please save the file in UTF-8!" - else - $file_count += 1 - end + puts "WARNING #{file} was detected as #{detection[:encoding]} encoding. Please save the file in UTF-8!" end rescue Exception => msg puts msg end end - files_failed = markdown_files.length - $file_count - if files_failed != 0 puts "FAILURE!!! #{files_failed} files were unable to be validated as UTF-8!" puts "Please resave the file as UTF-8." exit 1 else - puts "Success. All #{$file_count} files Ruby's UTF-8 validity checks. This won't catch most problems." + puts "Success. All #{$file_count} files passed UTF-8 validity checks." exit 0 end -- cgit v1.2.3