jmeter-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pmoua...@apache.org
Subject svn commit: r1529543 - in /jmeter/trunk: src/protocol/http/org/apache/jmeter/protocol/http/parser/ test/src/org/apache/jmeter/protocol/http/parser/ xdocs/
Date Sat, 05 Oct 2013 22:32:38 GMT
Author: pmouawad
Date: Sat Oct  5 22:32:38 2013
New Revision: 1529543

URL: http://svn.apache.org/r1529543
Log:
Bug 55632 - Have a new implementation of htmlParser for embedded resources parsing with better
performances
Bugzilla Id: 55632

Added:
    jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java
  (with props)
Modified:
    jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
    jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java
    jmeter/trunk/xdocs/changes.xml

Modified: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java?rev=1529543&r1=1529542&r2=1529543&view=diff
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
(original)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
Sat Oct  5 22:32:38 2013
@@ -38,6 +38,9 @@ public abstract class HTMLParser {
     private static final Logger log = LoggingManager.getLoggerForClass();
 
     protected static final String ATT_BACKGROUND    = "background";// $NON-NLS-1$
+    protected static final String ATT_CODE          = "code";// $NON-NLS-1$
+    protected static final String ATT_CODEBASE      = "codebase";// $NON-NLS-1$
+    protected static final String ATT_DATA          = "data";// $NON-NLS-1$
     protected static final String ATT_HREF          = "href";// $NON-NLS-1$
     protected static final String ATT_REL           = "rel";// $NON-NLS-1$
     protected static final String ATT_SRC           = "src";// $NON-NLS-1$
@@ -47,6 +50,7 @@ public abstract class HTMLParser {
     protected static final String TAG_APPLET        = "applet";// $NON-NLS-1$
     protected static final String TAG_BASE          = "base";// $NON-NLS-1$
     protected static final String TAG_BGSOUND       = "bgsound";// $NON-NLS-1$
+    protected static final String TAG_BODY          = "body";// $NON-NLS-1$
     protected static final String TAG_EMBED         = "embed";// $NON-NLS-1$
     protected static final String TAG_FRAME         = "frame";// $NON-NLS-1$
     protected static final String TAG_IFRAME        = "iframe";// $NON-NLS-1$
@@ -58,12 +62,12 @@ public abstract class HTMLParser {
     protected static final String STYLESHEET        = "stylesheet";// $NON-NLS-1$
 
     // Cache of parsers - parsers must be re-usable
-    private static final Map<String, HTMLParser> parsers = new ConcurrentHashMap<String,
HTMLParser>(3);
+    private static final Map<String, HTMLParser> parsers = new ConcurrentHashMap<String,
HTMLParser>(4);
 
     public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$
 
     public static final String DEFAULT_PARSER =
-        "org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser"; // $NON-NLS-1$
+        "org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser"; // $NON-NLS-1$
 
     /**
      * Protected constructor to prevent instantiation except from within

Added: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java?rev=1529543&view=auto
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java
(added)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java
Sat Oct  5 22:32:38 2013
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.jmeter.protocol.http.parser;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import jodd.lagarto.EmptyTagVisitor;
+import jodd.lagarto.LagartoParser;
+import jodd.lagarto.Tag;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jmeter.protocol.http.util.ConversionUtils;
+
+/**
+ * Parser based on Lagarto
+ * @since 2.10
+ */
+public class LagartoBasedHtmlParser extends HTMLParser {
+    /*
+     * A dummy class to pass the pointer of URL.
+     */
+    private static class URLPointer {
+        private URLPointer(URL newUrl) {
+            url = newUrl;
+        }
+        private URL url;
+    }
+    
+    private static final class JMeterTagVisitor extends EmptyTagVisitor {
+
+        private URLCollection urls;
+        private URLPointer baseUrl;
+
+        /**
+         * @param baseUrl 
+         * @param urls 
+         */
+        public JMeterTagVisitor(final URLPointer baseUrl, URLCollection urls) {
+            this.urls = urls;
+            this.baseUrl = baseUrl;
+        }
+
+        private final void extractAttribute(Tag tag, String attributeName) {
+            String url = tag.getAttributeValue(attributeName, false);
+            if (!StringUtils.isEmpty(url)) {
+                urls.addURL(url, baseUrl.url);
+            }
+        }
+        /*
+         * (non-Javadoc)
+         * 
+         * @see jodd.lagarto.EmptyTagVisitor#script(jodd.lagarto.Tag,
+         * java.lang.CharSequence)
+         */
+        @Override
+        public void script(Tag tag, CharSequence body) {
+            extractAttribute(tag, ATT_SRC);
+        }
+
+        /*
+         * (non-Javadoc)
+         * 
+         * @see jodd.lagarto.EmptyTagVisitor#tag(jodd.lagarto.Tag)
+         */
+        @Override
+        public void tag(Tag tag) {
+
+            String tagName = tag.getName().toLowerCase();
+            if (tagName.equals(TAG_BODY)) {
+                extractAttribute(tag, ATT_BACKGROUND);
+            } else if (tagName.equals(TAG_BASE)) {
+                String baseref = tag.getAttributeValue(ATT_HREF, false);
+                try {
+                    if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
+                    {
+                        baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
+                    }
+                } catch (MalformedURLException e1) {
+                    throw new RuntimeException(e1);
+                }
+            } else if (tagName.equals(TAG_IMAGE)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_APPLET)) {
+                extractAttribute(tag, ATT_CODE);
+            } else if (tagName.equals(TAG_OBJECT)) {
+                extractAttribute(tag, ATT_CODEBASE);                
+                extractAttribute(tag, ATT_DATA);                 
+            } else if (tagName.equals(TAG_INPUT)) {
+                // we check the input tag type for image
+                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttributeValue(ATT_TYPE, false)))
{
+                    // then we need to download the binary
+                    extractAttribute(tag, ATT_SRC);
+                }
+            } else if (tagName.equals(TAG_SCRIPT)) {
+                extractAttribute(tag, ATT_SRC);
+                // Bug 51750
+            } else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_EMBED)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_BGSOUND)){
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_LINK)) {
+                // Putting the string first means it works even if the attribute is null
+                if (STYLESHEET.equalsIgnoreCase(tag.getAttributeValue(ATT_REL, false))) {
+                    extractAttribute(tag, ATT_HREF);
+                }
+            } else {
+                extractAttribute(tag, ATT_BACKGROUND);
+            }
+
+
+            // Now look for URLs in the STYLE attribute
+            String styleTagStr = tag.getAttributeValue(ATT_STYLE, false);
+            if(styleTagStr != null) {
+                HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
+            }
+        }
+    }
+
+    @Override
+    public Iterator<URL> getEmbeddedResourceURLs(byte[] html, URL baseUrl,
+            URLCollection coll, String encoding) throws HTMLParseException {
+        try {
+            String contents = new String(html,encoding); 
+            LagartoParser lagartoParser = new LagartoParser(contents);
+            JMeterTagVisitor tagVisitor = new JMeterTagVisitor(new URLPointer(baseUrl), coll);
+            lagartoParser.parse(tagVisitor);
+            return coll.iterator();
+        } catch (Exception e) {
+            throw new HTMLParseException(e);
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
+     */
+    @Override
+    protected boolean isReusable() {
+        return true;
+    }
+}

Propchange: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java?rev=1529543&r1=1529542&r2=1529543&view=diff
==============================================================================
--- jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (original)
+++ jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java Sat Oct
 5 22:32:38 2013
@@ -99,7 +99,8 @@ public class TestHTMLParser extends JMet
         private static final String[] PARSERS = { 
             "org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser",
             "org.apache.jmeter.protocol.http.parser.JTidyHTMLParser",
-            "org.apache.jmeter.protocol.http.parser.RegexpHTMLParser" 
+            "org.apache.jmeter.protocol.http.parser.RegexpHTMLParser",
+            "org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser"
             };
 
         private static final TestData[] TESTS = new TestData[] {

Modified: jmeter/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jmeter/trunk/xdocs/changes.xml?rev=1529543&r1=1529542&r2=1529543&view=diff
==============================================================================
--- jmeter/trunk/xdocs/changes.xml (original)
+++ jmeter/trunk/xdocs/changes.xml Sat Oct  5 22:32:38 2013
@@ -396,6 +396,7 @@ If you use any plugin or third-party cod
 <li><bugzilla>55525</bugzilla> - Proxy should support alias for keyserver
entry</li>
 <li><bugzilla>55531</bugzilla> - Proxy recording and redirects. Added code
to disable redirected samples.</li>
 <li><bugzilla>55507</bugzilla> - Proxy SSL recording does not handle external
embedded resources well</li>
+<li><bugzilla>55632</bugzilla> - Have a new implementation of htmlParser
for embedded resources parsing with better performances</li>
 </ul>
 
 <h3>Other samplers</h3>



Mime
View raw message