jmeter-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pmoua...@apache.org
Subject svn commit: r1754678 - in /jmeter/trunk: bin/jmeter.properties src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java xdocs/changes.xml xdocs/usermanual/properties_reference.xml
Date Sun, 31 Jul 2016 18:50:01 GMT
Author: pmouawad
Date: Sun Jul 31 18:50:01 2016
New Revision: 1754678

URL: http://svn.apache.org/viewvc?rev=1754678&view=rev
Log:
Bug 59885 - Optimize css parsing for embedded resources download by introducing a cache 
Based on PR 219 contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com)
This closes #219 on github.
Bugzilla Id: 59885

Modified:
    jmeter/trunk/bin/jmeter.properties
    jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java
    jmeter/trunk/xdocs/changes.xml
    jmeter/trunk/xdocs/usermanual/properties_reference.xml

Modified: jmeter/trunk/bin/jmeter.properties
URL: http://svn.apache.org/viewvc/jmeter/trunk/bin/jmeter.properties?rev=1754678&r1=1754677&r2=1754678&view=diff
==============================================================================
--- jmeter/trunk/bin/jmeter.properties (original)
+++ jmeter/trunk/bin/jmeter.properties Sun Jul 31 18:50:01 2016
@@ -726,6 +726,13 @@ HTTPResponse.parsers=htmlParser wmlParse
 # CSS Parser based on ph-css
 cssParser.className=org.apache.jmeter.protocol.http.parser.CssParser
 cssParser.types=text/css
+
+# CSS parser LRU cache size
+# This cache stores the URLs found in a CSS to avoid continuously parsing the CSS
+# By default the cache size is 400
+# It can be disabled by setting its value to 0
+#css.parser.cache.size=400
+
 #---------------------------------------------------------------------------
 # HTML Parser configuration
 #---------------------------------------------------------------------------

Modified: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java?rev=1754678&r1=1754677&r2=1754678&view=diff
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java (original)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java Sun
Jul 31 18:50:01 2016
@@ -21,9 +21,13 @@ package org.apache.jmeter.protocol.http.
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.collections.map.LRUMap;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jmeter.util.JMeterUtils;
 import org.apache.jorphan.logging.LoggingManager;
@@ -50,7 +54,20 @@ import com.helger.css.reader.errorhandle
 public class CssParser implements LinkExtractorParser {
     private static final boolean IGNORE_UNRECOVERABLE_PARSING_ERROR = JMeterUtils.getPropDefault("httpsampler.ignore_failed_embedded_resource",
false); //$NON-NLS-1$
     private static final Logger LOG = LoggingManager.getLoggerForClass();
+    
+    /**
+     * 
+     */
+    private static final int CSS_URL_CACHE_MAX_SIZE = JMeterUtils.getPropDefault("css.parser.cache.size",
400);
+    
+    /**
+     * 
+     */
+    @SuppressWarnings("unchecked")
+    private static Map<String, URLCollection> CSS_URL_CACHE = 
+            CSS_URL_CACHE_MAX_SIZE > 0 ? Collections.synchronizedMap(new LRUMap(CSS_URL_CACHE_MAX_SIZE))
: null;
 
+    
     private static final class CustomLoggingCSSParseExceptionCallback extends LoggingCSSParseExceptionCallback
{
         /**
          * 
@@ -76,6 +93,7 @@ public class CssParser implements LinkEx
             }
         }
     }
+    
     /**
      * 
      */
@@ -93,40 +111,55 @@ public class CssParser implements LinkEx
     public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
             final URL baseUrl, String encoding) throws LinkExtractorParseException {
         try {
-            String cssContent = new String(data, encoding);
-            final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
-                        new CSSReaderSettings()
-                            .setBrowserCompliantMode(true)
-                            .setFallbackCharset(Charset.forName(encoding))
-                            .setCSSVersion (ECSSVersion.CSS30)
-                            .setCustomErrorHandler(new LoggingCSSParseErrorHandler())
-                            .setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
-            final List<URLString> list = new ArrayList<>();
-            final URLCollection urlCollection = new URLCollection(list);
-            if(aCSS != null) {
-                CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
-                    @Override
-                    public void onImport(final CSSImportRule importRule) {
-                        String location = importRule.getLocationString();
-                        if(!StringUtils.isEmpty(location)) {
-                            urlCollection.addURL(location, baseUrl);
+            boolean cacheEnabled = CSS_URL_CACHE_MAX_SIZE > 0;
+            String md5Key = null;
+            URLCollection urlCollection = null;
+            if(cacheEnabled) {
+                md5Key = DigestUtils.md5Hex(data);
+                urlCollection = CSS_URL_CACHE.get(md5Key);                
+            }
+            
+            if(urlCollection == null) {
+                String cssContent = new String(data, encoding);
+                final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
+                            new CSSReaderSettings()
+                                .setBrowserCompliantMode(true)
+                                .setFallbackCharset(Charset.forName(encoding))
+                                .setCSSVersion (ECSSVersion.CSS30)
+                                .setCustomErrorHandler(new LoggingCSSParseErrorHandler())
+                                .setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
+                final List<URLString> list = new ArrayList<>();
+                urlCollection = new URLCollection(list);
+                final URLCollection localCollection = urlCollection;
+                if(aCSS != null) {
+                    CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
+                        @Override
+                        public void onImport(final CSSImportRule importRule) {
+                            String location = importRule.getLocationString();
+                            if(!StringUtils.isEmpty(location)) {
+                                localCollection.addURL(location, baseUrl);
+                            }
                         }
+                        // Call for URLs outside of URLs
+                        @Override
+                        public void onUrlDeclaration(
+                                final ICSSTopLevelRule aTopLevelRule,
+                                final CSSDeclaration aDeclaration,
+                                final CSSExpressionMemberTermURI aURITerm) {
+                            // NOOP
+                            // Browser fetch such urls only when CSS rule matches
+                            // so we disable this code
+                            //urlCollection.addURL(aURITerm.getURIString(), baseUrl);
+                        }
+                    });
+                    if(cacheEnabled) {
+                        CSS_URL_CACHE.put(md5Key, urlCollection);
                     }
-                    // Call for URLs outside of URLs
-                    @Override
-                    public void onUrlDeclaration(
-                            final ICSSTopLevelRule aTopLevelRule,
-                            final CSSDeclaration aDeclaration,
-                            final CSSExpressionMemberTermURI aURITerm) {
-                        // NOOP
-                        // Browser fetch such urls only when CSS rule matches
-                        // so we disable this code
-                        //urlCollection.addURL(aURITerm.getURIString(), baseUrl);
-                    }
-                });
-            } else {
-               LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
+                } else {
+                   LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
+                }
             }
+            
             if(LOG.isDebugEnabled()) {
                 StringBuilder builder = new StringBuilder();
                 for (Iterator<URL> iterator = urlCollection.iterator(); iterator.hasNext();)
{
@@ -135,6 +168,7 @@ public class CssParser implements LinkEx
                 }
                 LOG.debug("Parsed:"+baseUrl+", got:"+builder.toString());
             }
+            
             return urlCollection.iterator();
         } catch (Exception e) {
             throw new LinkExtractorParseException(e);

Modified: jmeter/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jmeter/trunk/xdocs/changes.xml?rev=1754678&r1=1754677&r2=1754678&view=diff
==============================================================================
--- jmeter/trunk/xdocs/changes.xml [utf-8] (original)
+++ jmeter/trunk/xdocs/changes.xml [utf-8] Sun Jul 31 18:50:01 2016
@@ -65,7 +65,7 @@ Summary
 <ch_section>Incompatible changes</ch_section>
 
 <ul>
-    <li>Sample change...</li>
+    <li>A cache for CSS Parsing of URLs has been introduced in this version, it is
enabled by default. It is controlled by property <code>css.parser.cache.size</code>.
It can be disabled by setting its value to 0. See <bugzilla>59885</bugzilla></li>
 </ul>
 
 <h3>Deprecated and removed elements</h3>
@@ -80,6 +80,7 @@ Summary
 <h3>HTTP Samplers and Test Script Recorder</h3>
 <ul>
     <li><bug>59882</bug>Reduce memory allocations for better throughput.
Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>217</pr></li>
+    <li><bug>59885</bug>Optimize css parsing for embedded resources download
by introducing a cache. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through
<pr>219</pr></li>
 </ul>
 
 <h3>Other samplers</h3>

Modified: jmeter/trunk/xdocs/usermanual/properties_reference.xml
URL: http://svn.apache.org/viewvc/jmeter/trunk/xdocs/usermanual/properties_reference.xml?rev=1754678&r1=1754677&r2=1754678&view=diff
==============================================================================
--- jmeter/trunk/xdocs/usermanual/properties_reference.xml (original)
+++ jmeter/trunk/xdocs/usermanual/properties_reference.xml Sun Jul 31 18:50:01 2016
@@ -445,14 +445,18 @@ Uncomment this line if you put anything
 </section>
 <section name="&sect-num;.24 HTML Parser configuration" anchor="parser_config">
 <properties>
-<property name="HTTPResponse.parsers"> Space-separated list of parser groups<br/>,
defaults to:htmlParser wmlParser cssParser</property>
-<property name="cssParser.className"> for each parser, there should be a parser.types
and a parser.className property<br/> CSS Parser based on ph-css<br/>, defaults
to:org.apache.jmeter.protocol.http.parser.CssParser</property>
-<property name="cssParser.types">, defaults to:text/css</property>
-<property name=" see https://bz.apache.org/bugzilla/show_bug.cgi?id"> Define the HTML
parser to be used.<br/> Default parser:<br/> This new parser (since 2.10) should
perform better than all others<br/>, defaults to:55632</property>
-<property name="htmlParser.className"> Do not comment this property<br/>, defaults
to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
-<property name="htmlParser.className"> Other parsers:<br/> Default parser before
2.10<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JTidyHTMLParser</property>
-<property name="htmlParser.className"> Note that Regexp extractor may detect references
that have been commented out.<br/> In many cases it will work OK, but you should be
aware that it may generate <br/> additional references.<br/>, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
-<property name="htmlParser.className"> This parser is based on JSoup, it should be
the most accurate but less performant<br/> than LagartoBasedHtmlParser<br/>, defaults
to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</property>
+<property name="HTTPResponse.parsers">Space-separated list of parser groups<br/>,
defaults to:htmlParser wmlParser cssParser. For each parser, there should be a parser.types
and a parser.className property</property>
+<property name="cssParser.className"> CSS Parser based on ph-css<br/>, defaults
to:org.apache.jmeter.protocol.http.parser.CssParser</property>
+<property name="cssParser.types">content types handled by cssParser, defaults to:text/css</property>
+<property name="css.parser.cache.size">CSS parser LRU cache size. This cache stores
the URLs found in a CSS to avoid continuously parsing the CSS. By default the cache size is
400. It can be disabled by setting its value to 0.</property>
+<property name="htmlParser.className">Define the HTML parser to be used. This new parser
(since 2.10) should perform better than all others. see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632.
Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
+Other parsers:<br/>
+<ul> 
+<li>org.apache.jmeter.protocol.http.parser.JTidyHTMLParser : Default parser before
2.10<br/> 
+<li>org.apache.jmeter.protocol.http.parser.RegexpHTMLParser : Note that Regexp extractor
may detect references that have been commented out.<br/> In many cases it will work
OK, but you should be aware that it may generate additional references.</li>
+<li>org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser:This parser is based
on JSoup, it should be the most accurate but less performant than LagartoBasedHtmlParser,
defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</li>
+</li>
+</ul>
 <property name="htmlParser.types">Used by HTTPSamplerBase to associate htmlParser with
content types below <br/>, defaults to:text/html application/xhtml+xml application/xml
text/xml</property>
 <property name="wmlParser.className">, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
 <property name="wmlParser.types">Used by HTTPSamplerBase to associate wmlParser with
content types below <br/>, defaults to:text/vnd.wap.wml </property>



Mime
View raw message