Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(519)

Side by Side Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 2961533002: Add "AllArticles" mode to Reader Mode heuristics (Closed)
Patch Set: Revert "exclude ios" Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/content/renderer/distillability_agent.h" 5 #include "components/dom_distiller/content/renderer/distillability_agent.h"
6 6
7 #include "base/metrics/histogram_macros.h" 7 #include "base/metrics/histogram_macros.h"
8 #include "base/strings/string_util.h" 8 #include "base/strings/string_util.h"
9 #include "components/dom_distiller/content/common/distillability_service.mojom.h " 9 #include "components/dom_distiller/content/common/distillability_service.mojom.h "
10 #include "components/dom_distiller/core/distillable_page_detector.h" 10 #include "components/dom_distiller/core/distillable_page_detector.h"
(...skipping 23 matching lines...) Expand all
34 BLACKLISTED, 34 BLACKLISTED,
35 TOO_SHORT, 35 TOO_SHORT,
36 NOT_REJECTED, 36 NOT_REJECTED,
37 REJECTION_BUCKET_BOUNDARY 37 REJECTION_BUCKET_BOUNDARY
38 }; 38 };
39 39
40 // Returns whether it is necessary to send updates back to the browser. 40 // Returns whether it is necessary to send updates back to the browser.
41 // The number of updates can be from 0 to 2. See the tests in 41 // The number of updates can be from 0 to 2. See the tests in
42 // "distillable_page_utils_browsertest.cc". 42 // "distillable_page_utils_browsertest.cc".
43 // Most heuristics types only require one update after parsing. 43 // Most heuristics types only require one update after parsing.
44 // Adaboost is the only one doing the second update, which is after loading. 44 // Adaboost-based heuristics are the only ones doing the second update,
45 // which is after loading.
45 bool NeedToUpdate(bool is_loaded) { 46 bool NeedToUpdate(bool is_loaded) {
46 switch (GetDistillerHeuristicsType()) { 47 switch (GetDistillerHeuristicsType()) {
47 case DistillerHeuristicsType::ALWAYS_TRUE: 48 case DistillerHeuristicsType::ALWAYS_TRUE:
48 return !is_loaded; 49 return !is_loaded;
49 case DistillerHeuristicsType::OG_ARTICLE: 50 case DistillerHeuristicsType::OG_ARTICLE:
50 return !is_loaded; 51 return !is_loaded;
51 case DistillerHeuristicsType::ADABOOST_MODEL: 52 case DistillerHeuristicsType::ADABOOST_MODEL:
53 case DistillerHeuristicsType::ALL_ARTICLES:
52 return true; 54 return true;
53 case DistillerHeuristicsType::NONE: 55 case DistillerHeuristicsType::NONE:
54 default: 56 default:
55 return false; 57 return false;
56 } 58 }
57 } 59 }
58 60
59 // Returns whether this update is the last one for the page. 61 // Returns whether this update is the last one for the page.
60 bool IsLast(bool is_loaded) { 62 bool IsLast(bool is_loaded) {
61 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) 63 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL ||
64 GetDistillerHeuristicsType() == DistillerHeuristicsType::ALL_ARTICLES)
62 return is_loaded; 65 return is_loaded;
63 66
64 return true; 67 return true;
65 } 68 }
66 69
67 bool IsBlacklisted(const GURL& url) { 70 bool IsBlacklisted(const GURL& url) {
68 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { 71 for (size_t i = 0; i < arraysize(kBlacklist); ++i) {
69 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { 72 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) {
70 return true; 73 return true;
71 } 74 }
72 } 75 }
73 return false; 76 return false;
74 } 77 }
75 78
76 bool IsDistillablePageAdaboost(WebDocument& doc, 79 bool IsDistillablePageAdaboost(WebDocument& doc,
77 const DistillablePageDetector* detector, 80 const DistillablePageDetector* detector,
78 const DistillablePageDetector* long_page, 81 const DistillablePageDetector* long_page,
79 bool is_last) { 82 bool is_last,
83 bool exclude_mobile) {
80 WebDistillabilityFeatures features = doc.DistillabilityFeatures(); 84 WebDistillabilityFeatures features = doc.DistillabilityFeatures();
81 GURL parsed_url(doc.Url()); 85 GURL parsed_url(doc.Url());
82 if (!parsed_url.is_valid()) { 86 if (!parsed_url.is_valid()) {
83 return false; 87 return false;
84 } 88 }
85 std::vector<double> derived = CalculateDerivedFeatures( 89 std::vector<double> derived = CalculateDerivedFeatures(
86 features.open_graph, parsed_url, features.element_count, 90 features.open_graph, parsed_url, features.element_count,
87 features.anchor_count, features.form_count, features.moz_score, 91 features.anchor_count, features.form_count, features.moz_score,
88 features.moz_score_all_sqrt, features.moz_score_all_linear); 92 features.moz_score_all_sqrt, features.moz_score_all_linear);
89 double score = detector->Score(derived) - detector->GetThreshold(); 93 double score = detector->Score(derived) - detector->GetThreshold();
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 TOO_SHORT, REJECTION_BUCKET_BOUNDARY); 142 TOO_SHORT, REJECTION_BUCKET_BOUNDARY);
139 } else { 143 } else {
140 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", 144 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection",
141 NOT_REJECTED, REJECTION_BUCKET_BOUNDARY); 145 NOT_REJECTED, REJECTION_BUCKET_BOUNDARY);
142 } 146 }
143 } 147 }
144 148
145 if (blacklisted) { 149 if (blacklisted) {
146 return false; 150 return false;
147 } 151 }
148 if (features.is_mobile_friendly) { 152 if (exclude_mobile && features.is_mobile_friendly) {
149 return false; 153 return false;
150 } 154 }
151 return distillable && long_article; 155 return distillable && long_article;
152 } 156 }
153 157
154 bool IsDistillablePage(WebDocument& doc, bool is_last) { 158 bool IsDistillablePage(WebDocument& doc, bool is_last) {
155 switch (GetDistillerHeuristicsType()) { 159 switch (GetDistillerHeuristicsType()) {
156 case DistillerHeuristicsType::ALWAYS_TRUE: 160 case DistillerHeuristicsType::ALWAYS_TRUE:
157 return true; 161 return true;
158 case DistillerHeuristicsType::OG_ARTICLE: 162 case DistillerHeuristicsType::OG_ARTICLE:
159 return doc.DistillabilityFeatures().open_graph; 163 return doc.DistillabilityFeatures().open_graph;
160 case DistillerHeuristicsType::ADABOOST_MODEL: 164 case DistillerHeuristicsType::ADABOOST_MODEL:
161 return IsDistillablePageAdaboost(doc, 165 return IsDistillablePageAdaboost(
162 DistillablePageDetector::GetNewModel(), 166 doc, DistillablePageDetector::GetNewModel(),
163 DistillablePageDetector::GetLongPageModel(), is_last); 167 DistillablePageDetector::GetLongPageModel(), is_last, true);
168 case DistillerHeuristicsType::ALL_ARTICLES:
169 return IsDistillablePageAdaboost(
170 doc, DistillablePageDetector::GetNewModel(),
171 DistillablePageDetector::GetLongPageModel(), is_last, false);
164 case DistillerHeuristicsType::NONE: 172 case DistillerHeuristicsType::NONE:
165 default: 173 default:
166 return false; 174 return false;
167 } 175 }
168 } 176 }
169 177
170 } // namespace 178 } // namespace
171 179
172 DistillabilityAgent::DistillabilityAgent( 180 DistillabilityAgent::DistillabilityAgent(
173 content::RenderFrame* render_frame) 181 content::RenderFrame* render_frame)
(...skipping 30 matching lines...) Expand all
204 IsDistillablePage(doc, is_last), is_last); 212 IsDistillablePage(doc, is_last), is_last);
205 } 213 }
206 214
207 DistillabilityAgent::~DistillabilityAgent() {} 215 DistillabilityAgent::~DistillabilityAgent() {}
208 216
209 void DistillabilityAgent::OnDestruct() { 217 void DistillabilityAgent::OnDestruct() {
210 delete this; 218 delete this;
211 } 219 }
212 220
213 } // namespace dom_distiller 221 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « chrome/browser/flag_descriptions.cc ('k') | components/dom_distiller/core/dom_distiller_switches.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698