Browse Source

Update URLPattern to its latest version

Cheng Zhao 8 years ago
parent
commit
6763977316

+ 3 - 3
atom/browser/api/atom_api_web_request.cc

@@ -20,13 +20,13 @@ using content::BrowserThread;
 namespace mate {
 
 template<>
-struct Converter<extensions::URLPattern> {
+struct Converter<URLPattern> {
   static bool FromV8(v8::Isolate* isolate, v8::Local<v8::Value> val,
-                     extensions::URLPattern* out) {
+                     URLPattern* out) {
     std::string pattern;
     if (!ConvertFromV8(isolate, val, &pattern))
       return false;
-    return out->Parse(pattern) == extensions::URLPattern::PARSE_SUCCESS;
+    return out->Parse(pattern) == URLPattern::PARSE_SUCCESS;
   }
 };
 

+ 1 - 3
atom/browser/net/atom_network_delegate.h

@@ -19,13 +19,11 @@
 #include "net/http/http_request_headers.h"
 #include "net/http/http_response_headers.h"
 
-namespace extensions {
 class URLPattern;
-}
 
 namespace atom {
 
-using URLPatterns = std::set<extensions::URLPattern>;
+using URLPatterns = std::set<URLPattern>;
 
 const char* ResourceTypeToString(content::ResourceType type);
 

+ 94 - 80
chromium_src/extensions/common/url_pattern.cc

@@ -4,8 +4,11 @@
 
 #include "extensions/common/url_pattern.h"
 
+#include <stddef.h>
+
 #include <ostream>
 
+#include "base/macros.h"
 #include "base/strings/pattern.h"
 #include "base/strings/string_number_conversions.h"
 #include "base/strings/string_piece.h"
@@ -17,31 +20,28 @@
 #include "url/gurl.h"
 #include "url/url_util.h"
 
-const char extensions::URLPattern::kAllUrlsPattern[] = "<all_urls>";
+const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
 const char kExtensionScheme[] = "chrome-extension";
 
 namespace {
 
 // TODO(aa): What about more obscure schemes like data: and javascript: ?
 // Note: keep this array in sync with kValidSchemeMasks.
-const char* kValidSchemes[] = {
-    url::kHttpScheme,
-    url::kHttpsScheme,
-    url::kFileScheme,
-    url::kFtpScheme,
-    content::kChromeUIScheme,
-    kExtensionScheme,
+const char* const kValidSchemes[] = {
+    url::kHttpScheme,         url::kHttpsScheme,
+    url::kFileScheme,         url::kFtpScheme,
+    content::kChromeUIScheme, kExtensionScheme,
     url::kFileSystemScheme,
 };
 
 const int kValidSchemeMasks[] = {
-  extensions::URLPattern::SCHEME_HTTP,
-  extensions::URLPattern::SCHEME_HTTPS,
-  extensions::URLPattern::SCHEME_FILE,
-  extensions::URLPattern::SCHEME_FTP,
-  extensions::URLPattern::SCHEME_CHROMEUI,
-  extensions::URLPattern::SCHEME_EXTENSION,
-  extensions::URLPattern::SCHEME_FILESYSTEM,
+  URLPattern::SCHEME_HTTP,
+  URLPattern::SCHEME_HTTPS,
+  URLPattern::SCHEME_FILE,
+  URLPattern::SCHEME_FTP,
+  URLPattern::SCHEME_CHROMEUI,
+  URLPattern::SCHEME_EXTENSION,
+  URLPattern::SCHEME_FILESYSTEM,
 };
 
 static_assert(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
@@ -70,26 +70,26 @@ const char* const kParseResultMessages[] = {
   kParseErrorInvalidHost,
 };
 
-static_assert(extensions::URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
+static_assert(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
               "must add message for each parse result");
 
 const char kPathSeparator[] = "/";
 
-bool IsStandardScheme(const std::string& scheme) {
+bool IsStandardScheme(base::StringPiece scheme) {
   // "*" gets the same treatment as a standard scheme.
   if (scheme == "*")
     return true;
 
-  return url::IsStandard(scheme.c_str(),
+  return url::IsStandard(scheme.data(),
                          url::Component(0, static_cast<int>(scheme.length())));
 }
 
-bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
+bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) {
   if (port == "*")
     return true;
 
   // Only accept non-wildcard ports if the scheme uses ports.
-  if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
+  if (url::DefaultPortForScheme(scheme.data(), scheme.length()) ==
       url::PORT_UNSPECIFIED) {
     return false;
   }
@@ -107,17 +107,23 @@ bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
 // the path will have only a single wildcard at the end. This makes figuring
 // out overlap much easier. It seems like there is probably a computer-sciency
 // way to solve the general case, but we don't need that yet.
-std::string StripTrailingWildcard(const std::string& path) {
-  size_t wildcard_index = path.find('*');
-  size_t path_last = path.size() - 1;
-  return wildcard_index == path_last ? path.substr(0, path_last) : path;
+base::StringPiece StripTrailingWildcard(base::StringPiece path) {
+  if (path.ends_with("*"))
+    path.remove_suffix(1);
+  return path;
+}
+
+// Removes trailing dot from |host_piece| if any.
+base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) {
+  if (host_piece.ends_with("."))
+    host_piece.remove_suffix(1);
+  return host_piece;
 }
 
 }  // namespace
 
-namespace extensions {
 // static
-bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
+bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) {
   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
     if (scheme == kValidSchemes[i])
       return true;
@@ -126,7 +132,7 @@ bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
 }
 
 URLPattern::URLPattern()
-    : valid_schemes_(SCHEME_ALL),
+    : valid_schemes_(SCHEME_NONE),
       match_all_urls_(false),
       match_subdomains_(false),
       port_("*") {}
@@ -137,7 +143,7 @@ URLPattern::URLPattern(int valid_schemes)
       match_subdomains_(false),
       port_("*") {}
 
-URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
+URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern)
     // Strict error checking is used, because this constructor is only
     // appropriate when we know |pattern| is valid.
     : valid_schemes_(valid_schemes),
@@ -149,6 +155,8 @@ URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
     NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
 }
 
+URLPattern::URLPattern(const URLPattern& other) = default;
+
 URLPattern::~URLPattern() {
 }
 
@@ -168,7 +176,7 @@ std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
   return out << '"' << url_pattern.GetAsString() << '"';
 }
 
-URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
+URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) {
   spec_.clear();
   SetMatchAllURLs(false);
   SetMatchSubdomains(false);
@@ -185,12 +193,12 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
   bool has_standard_scheme_separator = true;
 
   // Some urls also use ':' alone as the scheme separator.
-  if (scheme_end_pos == std::string::npos) {
+  if (scheme_end_pos == base::StringPiece::npos) {
     scheme_end_pos = pattern.find(':');
     has_standard_scheme_separator = false;
   }
 
-  if (scheme_end_pos == std::string::npos)
+  if (scheme_end_pos == base::StringPiece::npos)
     return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
 
   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
@@ -214,7 +222,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
     path_start_pos = host_start_pos;
   } else if (scheme_ == url::kFileScheme) {
     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
-    if (host_end_pos == std::string::npos) {
+    if (host_end_pos == base::StringPiece::npos) {
       // Allow hostname omission.
       // e.g. file://* is interpreted as file:///*,
       // file://foo* is interpreted as file:///foo*.
@@ -231,10 +239,13 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
     if (host_start_pos == host_end_pos)
       return PARSE_ERROR_EMPTY_HOST;
 
-    if (host_end_pos == std::string::npos)
+    if (host_end_pos == base::StringPiece::npos)
       return PARSE_ERROR_EMPTY_PATH;
 
-    host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
+    // TODO(devlin): This whole series is expensive. Luckily we don't do it
+    // *too* often, but it could be optimized.
+    pattern.substr(host_start_pos, host_end_pos - host_start_pos)
+        .CopyToString(&host_);
 
     // The first component can optionally be '*' to match all subdomains.
     std::vector<std::string> host_components = base::SplitString(
@@ -282,9 +293,9 @@ void URLPattern::SetValidSchemes(int valid_schemes) {
   valid_schemes_ = valid_schemes;
 }
 
-void URLPattern::SetHost(const std::string& host) {
+void URLPattern::SetHost(base::StringPiece host) {
   spec_.clear();
-  host_ = host;
+  host.CopyToString(&host_);
 }
 
 void URLPattern::SetMatchAllURLs(bool val) {
@@ -304,9 +315,9 @@ void URLPattern::SetMatchSubdomains(bool val) {
   match_subdomains_ = val;
 }
 
-bool URLPattern::SetScheme(const std::string& scheme) {
+bool URLPattern::SetScheme(base::StringPiece scheme) {
   spec_.clear();
-  scheme_ = scheme;
+  scheme.CopyToString(&scheme_);
   if (scheme_ == "*") {
     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
   } else if (!IsValidScheme(scheme_)) {
@@ -315,7 +326,7 @@ bool URLPattern::SetScheme(const std::string& scheme) {
   return true;
 }
 
-bool URLPattern::IsValidScheme(const std::string& scheme) const {
+bool URLPattern::IsValidScheme(base::StringPiece scheme) const {
   if (valid_schemes_ == SCHEME_ALL)
     return true;
 
@@ -327,18 +338,18 @@ bool URLPattern::IsValidScheme(const std::string& scheme) const {
   return false;
 }
 
-void URLPattern::SetPath(const std::string& path) {
+void URLPattern::SetPath(base::StringPiece path) {
   spec_.clear();
-  path_ = path;
+  path.CopyToString(&path_);
   path_escaped_ = path_;
   base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
   base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
 }
 
-bool URLPattern::SetPort(const std::string& port) {
+bool URLPattern::SetPort(base::StringPiece port) {
   spec_.clear();
   if (IsValidPortForScheme(scheme_, port)) {
-    port_ = port;
+    port.CopyToString(&port_);
     return true;
   }
   return false;
@@ -354,15 +365,17 @@ bool URLPattern::MatchesURL(const GURL& test) const {
     test_url = test.inner_url();
   }
 
-  if (!MatchesScheme(test_url->scheme()))
+  if (!MatchesScheme(test_url->scheme_piece()))
     return false;
 
   if (match_all_urls_)
     return true;
 
   std::string path_for_request = test.PathForRequest();
-  if (has_inner_url)
-    path_for_request = test_url->path() + path_for_request;
+  if (has_inner_url) {
+    path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(),
+                                          path_for_request.c_str());
+  }
 
   return MatchesSecurityOriginHelper(*test_url) &&
          MatchesPath(path_for_request);
@@ -387,30 +400,33 @@ bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
   return MatchesSecurityOriginHelper(*test_url);
 }
 
-bool URLPattern::MatchesScheme(const std::string& test) const {
+bool URLPattern::MatchesScheme(base::StringPiece test) const {
   if (!IsValidScheme(test))
     return false;
 
   return scheme_ == "*" || test == scheme_;
 }
 
-bool URLPattern::MatchesHost(const std::string& host) const {
-  std::string test(url::kHttpScheme);
-  test += url::kStandardSchemeSeparator;
-  test += host;
-  test += "/";
-  return MatchesHost(GURL(test));
+bool URLPattern::MatchesHost(base::StringPiece host) const {
+  // TODO(devlin): This is a bit sad. Parsing urls is expensive.
+  return MatchesHost(
+      GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme,
+                              url::kStandardSchemeSeparator, host.data())));
 }
 
 bool URLPattern::MatchesHost(const GURL& test) const {
+  const base::StringPiece test_host(
+      CanonicalizeHostForMatching(test.host_piece()));
+  const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_));
+
   // If the hosts are exactly equal, we have a match.
-  if (test.host() == host_)
+  if (test_host == pattern_host)
     return true;
 
   // If we're matching subdomains, and we have no host in the match pattern,
   // that means that we're matching all hosts, which means we have a match no
   // matter what the test host is.
-  if (match_subdomains_ && host_.empty())
+  if (match_subdomains_ && pattern_host.empty())
     return true;
 
   // Otherwise, we can only match if our match pattern matches subdomains.
@@ -423,14 +439,13 @@ bool URLPattern::MatchesHost(const GURL& test) const {
     return false;
 
   // Check if the test host is a subdomain of our host.
-  if (test.host().length() <= (host_.length() + 1))
+  if (test_host.length() <= (pattern_host.length() + 1))
     return false;
 
-  if (test.host().compare(test.host().length() - host_.length(),
-                          host_.length(), host_) != 0)
+  if (!test_host.ends_with(pattern_host))
     return false;
 
-  return test.host()[test.host().length() - host_.length() - 1] == '.';
+  return test_host[test_host.length() - pattern_host.length() - 1] == '.';
 }
 
 bool URLPattern::ImpliesAllHosts() const {
@@ -444,28 +459,24 @@ bool URLPattern::ImpliesAllHosts() const {
   if (!match_subdomains_)
     return false;
 
-  // If |host_| is a recognized TLD, this will be 0. We don't include private
-  // TLDs, so that, e.g., *.appspot.com does not imply all hosts.
-  size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
-      host_,
-      net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
-      net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
   // If there was more than just a TLD in the host (e.g., *.foobar.com), it
-  // doesn't imply all hosts.
-  if (registry_length > 0)
+  // doesn't imply all hosts. We don't include private TLDs, so that, e.g.,
+  // *.appspot.com does not imply all hosts.
+  if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
+          host_, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
+          net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
     return false;
 
   // At this point the host could either be just a TLD ("com") or some unknown
   // TLD-like string ("notatld"). To disambiguate between them construct a
-  // fake URL, and check the registry. This returns 0 if the TLD is
-  // unrecognized, or the length of the recognized TLD.
-  registry_length = net::registry_controlled_domains::GetRegistryLength(
-      base::StringPrintf("foo.%s", host_.c_str()),
+  // fake URL, and check the registry.
+  //
+  // If we recognized this TLD, then this is a pattern like *.com, and it
+  // should imply all hosts.
+  return net::registry_controlled_domains::HostHasRegistryControlledDomain(
+      "notatld." + host_,
       net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
       net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
-  // If we recognized this TLD, then this is a pattern like *.com, and it
-  // should imply all hosts. Otherwise, this doesn't imply all hosts.
-  return registry_length > 0;
 }
 
 bool URLPattern::MatchesSingleOrigin() const {
@@ -474,11 +485,16 @@ bool URLPattern::MatchesSingleOrigin() const {
   return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_;
 }
 
-bool URLPattern::MatchesPath(const std::string& test) const {
+bool URLPattern::MatchesPath(base::StringPiece test) const {
   // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
   // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
-  if (test + "/*" == path_escaped_)
+  // The below if is a no-copy way of doing (test + "/*" == path_escaped_).
+  if (path_escaped_.length() == test.length() + 2 &&
+      base::StartsWith(path_escaped_.c_str(), test,
+                       base::CompareCase::SENSITIVE) &&
+      base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) {
     return true;
+  }
 
   return base::MatchPattern(test, path_escaped_);
 }
@@ -516,7 +532,7 @@ const std::string& URLPattern::GetAsString() const {
   if (!path_.empty())
     spec += path_;
 
-  spec_ = spec;
+  spec_ = std::move(spec);
   return spec_;
 }
 
@@ -574,7 +590,7 @@ bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
   return true;
 }
 
-bool URLPattern::MatchesPortPattern(const std::string& port) const {
+bool URLPattern::MatchesPortPattern(base::StringPiece port) const {
   return port_ == "*" || port_ == port;
 }
 
@@ -615,5 +631,3 @@ const char* URLPattern::GetParseResultString(
     URLPattern::ParseResult parse_result) {
   return kParseResultMessages[parse_result];
 }
-
-}  // namespace extensions

+ 15 - 15
chromium_src/extensions/common/url_pattern.h

@@ -9,9 +9,10 @@
 #include <string>
 #include <vector>
 
+#include "base/strings/string_piece.h"
+
 class GURL;
 
-namespace extensions {
 // A pattern that can be used to match URLs. A URLPattern is a very restricted
 // subset of URL syntax:
 //
@@ -82,15 +83,16 @@ class URLPattern {
   static const char kAllUrlsPattern[];
 
   // Returns true if the given |scheme| is considered valid for extensions.
-  static bool IsValidSchemeForExtensions(const std::string& scheme);
+  static bool IsValidSchemeForExtensions(base::StringPiece scheme);
 
   explicit URLPattern(int valid_schemes);
 
   // Convenience to construct a URLPattern from a string. If the string is not
   // known ahead of time, use Parse() instead, which returns success or failure.
-  URLPattern(int valid_schemes, const std::string& pattern);
+  URLPattern(int valid_schemes, base::StringPiece pattern);
 
   URLPattern();
+  URLPattern(const URLPattern& other);
   ~URLPattern();
 
   bool operator<(const URLPattern& other) const;
@@ -101,7 +103,7 @@ class URLPattern {
   // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
   // failure, this instance will have some intermediate values and is in an
   // invalid state.
-  ParseResult Parse(const std::string& pattern_str);
+  ParseResult Parse(base::StringPiece pattern_str);
 
   // Gets the bitmask of valid schemes.
   int valid_schemes() const { return valid_schemes_; }
@@ -110,7 +112,7 @@ class URLPattern {
   // Gets the host the pattern matches. This can be an empty string if the
   // pattern matches all hosts (the input was <scheme>://*/<whatever>).
   const std::string& host() const { return host_; }
-  void SetHost(const std::string& host);
+  void SetHost(base::StringPiece host);
 
   // Gets whether to match subdomains of host().
   bool match_subdomains() const { return match_subdomains_; }
@@ -119,7 +121,7 @@ class URLPattern {
   // Gets the path the pattern matches with the leading slash. This can have
   // embedded asterisks which are interpreted using glob rules.
   const std::string& path() const { return path_; }
-  void SetPath(const std::string& path);
+  void SetPath(base::StringPiece path);
 
   // Returns true if this pattern matches all urls.
   bool match_all_urls() const { return match_all_urls_; }
@@ -128,14 +130,14 @@ class URLPattern {
   // Sets the scheme for pattern matches. This can be a single '*' if the
   // pattern matches all valid schemes (as defined by the valid_schemes_
   // property). Returns false on failure (if the scheme is not valid).
-  bool SetScheme(const std::string& scheme);
+  bool SetScheme(base::StringPiece scheme);
   // Note: You should use MatchesScheme() instead of this getter unless you
   // absolutely need the exact scheme. This is exposed for testing.
   const std::string& scheme() const { return scheme_; }
 
   // Returns true if the specified scheme can be used in this URL pattern, and
   // false otherwise. Uses valid_schemes_ to determine validity.
-  bool IsValidScheme(const std::string& scheme) const;
+  bool IsValidScheme(base::StringPiece scheme) const;
 
   // Returns true if this instance matches the specified URL.
   bool MatchesURL(const GURL& test) const;
@@ -147,14 +149,14 @@ class URLPattern {
   // Note that if test is "filesystem", this may fail whereas MatchesURL
   // may succeed.  MatchesURL is smart enough to look at the inner_url instead
   // of the outer "filesystem:" part.
-  bool MatchesScheme(const std::string& test) const;
+  bool MatchesScheme(base::StringPiece test) const;
 
   // Returns true if |test| matches our host.
-  bool MatchesHost(const std::string& test) const;
+  bool MatchesHost(base::StringPiece test) const;
   bool MatchesHost(const GURL& test) const;
 
   // Returns true if |test| matches our path.
-  bool MatchesPath(const std::string& test) const;
+  bool MatchesPath(base::StringPiece test) const;
 
   // Returns true if the pattern is vague enough that it implies all hosts,
   // such as *://*/*.
@@ -168,7 +170,7 @@ class URLPattern {
   bool MatchesSingleOrigin() const;
 
   // Sets the port. Returns false if the port is invalid.
-  bool SetPort(const std::string& port);
+  bool SetPort(base::StringPiece port);
   const std::string& port() const { return port_; }
 
   // Returns a string representing this instance.
@@ -216,7 +218,7 @@ class URLPattern {
   bool MatchesSecurityOriginHelper(const GURL& test) const;
 
   // Returns true if our port matches the |port| pattern (it may be "*").
-  bool MatchesPortPattern(const std::string& port) const;
+  bool MatchesPortPattern(base::StringPiece port) const;
 
   // If the URLPattern contains a wildcard scheme, returns a list of
   // equivalent literal schemes, otherwise returns the current scheme.
@@ -259,6 +261,4 @@ std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern);
 
 typedef std::vector<URLPattern> URLPatternList;
 
-}  // namespace extensions
-
 #endif  // EXTENSIONS_COMMON_URL_PATTERN_H_