jmeter-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pmoua...@apache.org
Subject svn commit: r1530074 - in /jmeter/trunk: bin/jmeter.properties src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java
Date Mon, 07 Oct 2013 21:36:47 GMT
Author: pmouawad
Date: Mon Oct  7 21:36:47 2013
New Revision: 1530074

URL: http://svn.apache.org/r1530074
Log:
Bug 55632 - Have a new implementation of htmlParser for embedded resources parsing with better
performances
Bugzilla Id: 55632

Added:
    jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java
  (with props)
Modified:
    jmeter/trunk/bin/jmeter.properties
    jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java

Modified: jmeter/trunk/bin/jmeter.properties
URL: http://svn.apache.org/viewvc/jmeter/trunk/bin/jmeter.properties?rev=1530074&r1=1530073&r2=1530074&view=diff
==============================================================================
--- jmeter/trunk/bin/jmeter.properties (original)
+++ jmeter/trunk/bin/jmeter.properties Mon Oct  7 21:36:47 2013
@@ -585,7 +585,8 @@ HTTPResponse.parsers=htmlParser wmlParse
 # This new parser (since 2.10) should perform better than all others
 # see https://issues.apache.org/bugzilla/show_bug.cgi?id=55632
 #htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser
-#
+# This parser is based on JSoup
+#htmlParser.className=org.apache.jmeter.protocol.http.parser.RegexpHTMLParser
 
 htmlParser.types=text/html application/xhtml+xml application/xml text/xml
 

Added: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java?rev=1530074&view=auto
==============================================================================
--- jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java
(added)
+++ jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java
Mon Oct  7 21:36:47 2013
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.jmeter.protocol.http.parser;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jmeter.protocol.http.util.ConversionUtils;
+import org.apache.jorphan.logging.LoggingManager;
+import org.apache.log.Logger;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
+/**
+ * Parser based on JSOUP
+ * @since 2.10
+ * TODO Factor out common code between {@link LagartoBasedHtmlParser} and this one (adapter
pattern)
+ */
+public class JsoupBasedHtmlParser extends HTMLParser {
+    private static final Logger log = LoggingManager.getLoggerForClass();
+
+    /*
+     * A dummy class to pass the pointer of URL.
+     */
+    private static class URLPointer {
+        private URLPointer(URL newUrl) {
+            url = newUrl;
+        }
+        private URL url;
+    }
+    
+    private static final class JMeterNodeVisitor implements NodeVisitor {
+
+        private URLCollection urls;
+        private URLPointer baseUrl;
+
+        /**
+         * @param baseUrl 
+         * @param urls 
+         */
+        public JMeterNodeVisitor(final URLPointer baseUrl, URLCollection urls) {
+            this.urls = urls;
+            this.baseUrl = baseUrl;
+        }
+
+        private final void extractAttribute(Element tag, String attributeName) {
+            String url = tag.attr(attributeName);
+            if (!StringUtils.isEmpty(url)) {
+                urls.addURL(url, baseUrl.url);
+            }
+        }
+
+        @Override
+        public void head(Node node, int depth) {
+        	if (!(node instanceof Element)) {
+        		return;
+        	}
+        	Element tag = (Element) node;
+            String tagName = tag.tagName().toLowerCase();
+            if (tagName.equals(TAG_BODY)) {
+                extractAttribute(tag, ATT_BACKGROUND);
+            } else if (tagName.equals(TAG_SCRIPT)) {
+            	extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_BASE)) {
+                String baseref = tag.attr(ATT_HREF);
+                try {
+                    if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
+                    {
+                        baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
+                    }
+                } catch (MalformedURLException e1) {
+                    throw new RuntimeException(e1);
+                }
+            } else if (tagName.equals(TAG_IMAGE)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_APPLET)) {
+                extractAttribute(tag, ATT_CODE);
+            } else if (tagName.equals(TAG_OBJECT)) {
+                extractAttribute(tag, ATT_CODEBASE);                
+                extractAttribute(tag, ATT_DATA);                 
+            } else if (tagName.equals(TAG_INPUT)) {
+                // we check the input tag type for image
+                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.attr(ATT_TYPE))) {
+                    // then we need to download the binary
+                    extractAttribute(tag, ATT_SRC);
+                }
+            } else if (tagName.equals(TAG_SCRIPT)) {
+                extractAttribute(tag, ATT_SRC);
+                // Bug 51750
+            } else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_EMBED)) {
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_BGSOUND)){
+                extractAttribute(tag, ATT_SRC);
+            } else if (tagName.equals(TAG_LINK)) {
+                // Putting the string first means it works even if the attribute is null
+                if (STYLESHEET.equalsIgnoreCase(tag.attr(ATT_REL))) {
+                    extractAttribute(tag, ATT_HREF);
+                }
+            } else {
+                extractAttribute(tag, ATT_BACKGROUND);
+            }
+
+
+            // Now look for URLs in the STYLE attribute
+            String styleTagStr = tag.attr(ATT_STYLE);
+            if(styleTagStr != null) {
+                HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
+            }
+        }
+
+		@Override
+		public void tail(Node arg0, int arg1) {
+			// Noop
+		}
+    }
+
+    @Override
+    public Iterator<URL> getEmbeddedResourceURLs(byte[] html, URL baseUrl,
+            URLCollection coll, String encoding) throws HTMLParseException {
+        try {
+            String contents = new String(html,encoding); 
+            Document doc = Jsoup.parse(contents);
+            JMeterNodeVisitor nodeVisitor = new JMeterNodeVisitor(new URLPointer(baseUrl),
coll);
+            new NodeTraversor(nodeVisitor).traverse(doc);
+            return coll.iterator();
+        } catch (Exception e) {
+            throw new HTMLParseException(e);
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
+     */
+    @Override
+    protected boolean isReusable() {
+        return true;
+    }
+}

Propchange: jmeter/trunk/src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java
URL: http://svn.apache.org/viewvc/jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java?rev=1530074&r1=1530073&r2=1530074&view=diff
==============================================================================
--- jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (original)
+++ jmeter/trunk/test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java Mon Oct
 7 21:36:47 2013
@@ -100,7 +100,8 @@ public class TestHTMLParser extends JMet
             "org.apache.jmeter.protocol.http.parser.HtmlParserHTMLParser",
             "org.apache.jmeter.protocol.http.parser.JTidyHTMLParser",
             "org.apache.jmeter.protocol.http.parser.RegexpHTMLParser",
-            "org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser"
+            "org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser",
+            "org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser"
             };
 
         private static final TestData[] TESTS = new TestData[] {



Mime
View raw message