From 7041e0ba1b9e9cbb0ea62ddb12c635bae81a4b7d Mon Sep 17 00:00:00 2001 From: Florian Scholz Date: Thu, 13 Apr 2023 16:14:01 +0200 Subject: [PATCH 0001/1022] Add links.{code,home} for MDN scrapers --- lib/docs/scrapers/http.rb | 9 +++++++++ lib/docs/scrapers/mdn/css.rb | 4 ++++ lib/docs/scrapers/mdn/dom.rb | 4 ++++ lib/docs/scrapers/mdn/html.rb | 4 ++++ lib/docs/scrapers/mdn/javascript.rb | 4 ++++ lib/docs/scrapers/mdn/mdn.rb | 4 ++++ lib/docs/scrapers/mdn/svg.rb | 4 ++++ lib/docs/scrapers/mdn/web_extensions.rb | 4 ++-- 8 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/http.rb b/lib/docs/scrapers/http.rb index b8f382189d..a2f4a7f9b0 100644 --- a/lib/docs/scrapers/http.rb +++ b/lib/docs/scrapers/http.rb @@ -8,6 +8,15 @@ class Http < Mdn 'https://developer.mozilla.org/en-US/docs/Web/HTTP', 'https://datatracker.ietf.org/doc/html/', ] + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/HTTP', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/http' + } + + options[:attribution] = <<-HTML + © 2005–2023 MDN contributors.
+ Licensed under the Creative Commons Attribution-ShareAlike License v2.5 or later. + HTML html_filters.push 'http/clean_html', 'http/entries', 'title' diff --git a/lib/docs/scrapers/mdn/css.rb b/lib/docs/scrapers/mdn/css.rb index 724a6fec28..38ac2de681 100644 --- a/lib/docs/scrapers/mdn/css.rb +++ b/lib/docs/scrapers/mdn/css.rb @@ -4,6 +4,10 @@ class Css < Mdn self.name = 'CSS' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/CSS' self.root_path = '/Reference' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/CSS', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/css' + } html_filters.push 'css/clean_html', 'css/entries' diff --git a/lib/docs/scrapers/mdn/dom.rb b/lib/docs/scrapers/mdn/dom.rb index 7af13d559d..b9466c1e11 100644 --- a/lib/docs/scrapers/mdn/dom.rb +++ b/lib/docs/scrapers/mdn/dom.rb @@ -5,6 +5,10 @@ class Dom < Mdn self.name = 'Web APIs' self.slug = 'dom' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/API', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/api' + } html_filters.push 'dom/clean_html', 'dom/entries' diff --git a/lib/docs/scrapers/mdn/html.rb b/lib/docs/scrapers/mdn/html.rb index be7e7bae71..4857b70224 100644 --- a/lib/docs/scrapers/mdn/html.rb +++ b/lib/docs/scrapers/mdn/html.rb @@ -5,6 +5,10 @@ class Html < Mdn # release = '2023-01-06' self.name = 'HTML' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/HTML' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/HTML', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/html' + } html_filters.push 'html/clean_html', 'html/entries' diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb index 8c1a64bc0a..b935ff9e9a 100644 --- a/lib/docs/scrapers/mdn/javascript.rb +++ b/lib/docs/scrapers/mdn/javascript.rb @@ -6,6 +6,10 @@ class Javascript < Mdn # release = '2023-01-06' self.name = 'JavaScript' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/JavaScript', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/javascript' + } html_filters.push 'javascript/clean_html', 'javascript/entries' diff --git a/lib/docs/scrapers/mdn/mdn.rb b/lib/docs/scrapers/mdn/mdn.rb index 54856e475d..00c43c3f20 100644 --- a/lib/docs/scrapers/mdn/mdn.rb +++ b/lib/docs/scrapers/mdn/mdn.rb @@ -2,6 +2,10 @@ module Docs class Mdn < UrlScraper self.abstract = true self.type = 'mdn' + self.links = { + home: 'https://developer.mozilla.org', + code: 'https://github.com/mdn/content' + } html_filters.push 'mdn/clean_html', 'mdn/compat_tables' diff --git a/lib/docs/scrapers/mdn/svg.rb b/lib/docs/scrapers/mdn/svg.rb index 689fae99e7..3fc6711371 100644 --- a/lib/docs/scrapers/mdn/svg.rb +++ b/lib/docs/scrapers/mdn/svg.rb @@ -6,6 +6,10 @@ class Svg < Mdn # release = '2022-09-06' self.name = 'SVG' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/SVG' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Web/SVG', + code: 'https://github.com/mdn/content/tree/main/files/en-us/web/svg' + } html_filters.push 'svg/clean_html', 'svg/entries' diff --git a/lib/docs/scrapers/mdn/web_extensions.rb b/lib/docs/scrapers/mdn/web_extensions.rb index 664ca98552..ccbbe47bd6 100644 --- a/lib/docs/scrapers/mdn/web_extensions.rb +++ b/lib/docs/scrapers/mdn/web_extensions.rb @@ -4,9 +4,9 @@ class WebExtensions < Mdn self.name = 'Web Extensions' self.slug = 'web_extensions' self.links = { - home: 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions' + home: 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions', + code: 'https://github.com/mdn/content/tree/main/files/en-us/mozilla/add-ons/webextensions' } - self.base_url = 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions' html_filters.push 'web_extensions/entries', 'web_extensions/clean_html' From 46f2a8484be66c9e142f520f57979f1cac87cb5b Mon Sep 17 00:00:00 2001 From: Florian Scholz Date: Thu, 13 Apr 2023 17:40:18 +0200 Subject: [PATCH 0002/1022] Remove skip paths of non-existent MDN pages --- lib/docs/scrapers/mdn/html.rb | 6 ------ lib/docs/scrapers/mdn/javascript.rb | 7 ------- lib/docs/scrapers/mdn/svg.rb | 2 -- 3 files changed, 15 deletions(-) diff --git a/lib/docs/scrapers/mdn/html.rb b/lib/docs/scrapers/mdn/html.rb index be7e7bae71..3144231da9 100644 --- a/lib/docs/scrapers/mdn/html.rb +++ b/lib/docs/scrapers/mdn/html.rb @@ -10,12 +10,6 @@ class Html < Mdn options[:root_title] = 'HTML' - options[:skip] = %w( - /index - /Element/shadow - /Element/webkit-meter-optimum-value - ) - options[:replace_paths] = { '/Element/h1' => '/Element/Heading_Elements', '/Element/h2' => '/Element/Heading_Elements', diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb index 8c1a64bc0a..60e7a1ed9a 100644 --- a/lib/docs/scrapers/mdn/javascript.rb +++ b/lib/docs/scrapers/mdn/javascript.rb @@ -11,13 +11,6 @@ class Javascript < Mdn options[:root_title] = 'JavaScript' - # Don't want - options[:skip] = %w( - /Methods_Index - /Properties_Index - /Operators/Legacy_generator_function - /Statements/Legacy_generator_function) - # Duplicates options[:skip].concat %w( /Global_Objects diff --git a/lib/docs/scrapers/mdn/svg.rb b/lib/docs/scrapers/mdn/svg.rb index 689fae99e7..934b640300 100644 --- a/lib/docs/scrapers/mdn/svg.rb +++ b/lib/docs/scrapers/mdn/svg.rb @@ -21,8 +21,6 @@ class Svg < Mdn end end - options[:skip] = %w(/Compatibility_sources /FAQ) - options[:fix_urls] = ->(url) do url.sub! 'https://developer.mozilla.org/en-US/Web/SVG', Svg.base_url url.sub! 'https://developer.mozilla.org/en-US/docs/SVG', Svg.base_url From 426be3138e359fc664974cc00bfea0a5cd839262 Mon Sep 17 00:00:00 2001 From: Florian Scholz Date: Thu, 13 Apr 2023 17:46:05 +0200 Subject: [PATCH 0003/1022] Fix --- lib/docs/scrapers/mdn/javascript.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb index 60e7a1ed9a..1049d6d8a1 100644 --- a/lib/docs/scrapers/mdn/javascript.rb +++ b/lib/docs/scrapers/mdn/javascript.rb @@ -12,7 +12,7 @@ class Javascript < Mdn options[:root_title] = 'JavaScript' # Duplicates - options[:skip].concat %w( + options[:skip] = %w( /Global_Objects /Operators /Statements) From 8f0edbaaec5c1e38cfc97b96937c4567b72cc819 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 15 Apr 2023 00:34:42 +0000 Subject: [PATCH 0004/1022] chore(deps): update dependency sinatra-contrib to v3.0.6 --- Gemfile.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index be0265a8a7..0ffb5fa057 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -71,7 +71,7 @@ GEM pry (>= 0.13, < 0.15) racc (1.6.2) rack (2.2.6.4) - rack-protection (3.0.5) + rack-protection (3.0.6) rack rack-ssl-enforcer (0.2.9) rack-test (2.1.0) @@ -91,16 +91,16 @@ GEM sass-listen (4.0.0) rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) - sinatra (3.0.5) + sinatra (3.0.6) mustermann (~> 3.0) rack (~> 2.2, >= 2.2.4) - rack-protection (= 3.0.5) + rack-protection (= 3.0.6) tilt (~> 2.0) - sinatra-contrib (3.0.5) + sinatra-contrib (3.0.6) multi_json mustermann (~> 3.0) - rack-protection (= 3.0.5) - sinatra (= 3.0.5) + rack-protection (= 3.0.6) + sinatra (= 3.0.6) tilt (~> 2.0) sprockets (3.7.2) concurrent-ruby (~> 1.0) From ad56aa406b0df49861d58e2dcdcb0c25e6499752 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Mon, 24 Apr 2023 12:05:37 +0200 Subject: [PATCH 0005/1022] MDN scrapers: fix browser compatibility table --- lib/docs/filters/mdn/compat_tables.rb | 47 +++++++++------------------ lib/docs/scrapers/mdn/mdn.rb | 3 +- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/lib/docs/filters/mdn/compat_tables.rb b/lib/docs/filters/mdn/compat_tables.rb index b9891db100..d51c8f2c81 100644 --- a/lib/docs/filters/mdn/compat_tables.rb +++ b/lib/docs/filters/mdn/compat_tables.rb @@ -71,27 +71,14 @@ def generate_compatibility_table() end def request_bcd_uris - url = current_url.to_s + '/index.json' - response = Request.run url - index_json = JSON.load response.body - - uris = [] - - index_json['doc']['body'].each do |element| - uris.push(element['value']['dataURL']) if element['type'] == 'browser_compatibility' and element['value']['dataURL'] - end - - uris.map! do |uri| - tmp_uri = URI.parse(base_url.to_s) - tmp_uri.path = uri - uri = tmp_uri.to_s - end - - return uris + hydration = JSON.load at_css('#hydration').text + files = hydration['doc']['browserCompat'] || [] + files.map { |file| "https://bcd.developer.mozilla.org/bcd/api/v0/current/#{file}.json" } end def generate_compatibility_table_wrapper(url) response = Request.run url + return "" unless response.success? @json_data = JSON.load(response.body)['data'] html_table = generate_basic_html_table() @@ -202,32 +189,30 @@ def add_data_to_entry(json, entry) if version_removed[0] format_string = "" + elsif version_added[0] == 'No' + format_string = "" + elsif version_added[0] == '?' + format_string = "" else - if version_added[0] == 'No' - format_string = "" - elsif version_added[0] == '?' - format_string = "" - else - format_string = "" - end + format_string = "" end for value in (0..version_added.length-1) do if version_removed[value] - format_string += "
#{version_added[value]}-#{version_removed[value]}
" + version_string = "#{version_added[value]}–#{version_removed[value]}" else - if version_added[value] == 'No' - format_string += "
#{version_added[value]}
" - else - format_string += "
#{version_added[value]}
" - end + version_string = version_added[value] end if notes[value] - format_string += "
#{notes[value]}
" + format_string += "
#{version_string}#{notes[value]}
" + else + format_string += "
#{version_string}
" end end + format_string += "" + else format_string = "
?
" end diff --git a/lib/docs/scrapers/mdn/mdn.rb b/lib/docs/scrapers/mdn/mdn.rb index 00c43c3f20..7ca01b050a 100644 --- a/lib/docs/scrapers/mdn/mdn.rb +++ b/lib/docs/scrapers/mdn/mdn.rb @@ -7,7 +7,8 @@ class Mdn < UrlScraper code: 'https://github.com/mdn/content' } - html_filters.push 'mdn/clean_html', 'mdn/compat_tables' + html_filters.insert_before 'container', 'mdn/compat_tables' # needs access to - - """ - source.replace / +\ +` + ); + return source.replace(/ \ -` +`, ); return source.replace(/