portals-jetspeed-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pau...@apache.org
Subject cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java
Date Wed, 07 May 2003 05:38:30 GMT
paulsp      2003/05/06 22:38:30

  Modified:    src/java/org/apache/jetspeed/services/lucene
                        TestLuceneSearch.java LuceneSearchService.java
  Log:
  Added add() method.
  Note:
    Add() will allows duplicates.  To be  fixed later.
    Title and description are not parsed out ot the document.
  
  Revision  Changes    Path
  1.2       +42 -3     jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java
  
  Index: TestLuceneSearch.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TestLuceneSearch.java	7 May 2003 00:17:16 -0000	1.1
  +++ TestLuceneSearch.java	7 May 2003 05:38:29 -0000	1.2
  @@ -54,6 +54,7 @@
   
   package org.apache.jetspeed.services.lucene;
   
  +import java.net.URL;
   // Java imports
   import java.util.HashMap;
   
  @@ -140,6 +141,15 @@
           return new TestSuite( TestLuceneSearch.class );
       }
       
  +    public void testPutWebPage() throws Exception
  +    {
  +        URL jetspeedHomePage = new URL("http://jakarta.apache.org/jetspeed");
  +        assertNotNull("Created URL to Jetspeed Home Page",  jetspeedHomePage);
  +        assertTrue("Adding to index", LuceneSearch.add(jetspeedHomePage));
  +        assertTrue("Adding to index", LuceneSearch.add(new URL("http://www.google.com")));
  +        assertTrue("Adding to index", LuceneSearch.add(new URL("http://jakarta.apache.org")));
  +    }
  +
       /**
        *
        * @throws Exception
  @@ -147,8 +157,23 @@
       public void testVerifyJetspeedSearch() throws Exception
       {
           SearchResult result = null;
  -        SearchResults results  = LuceneSearch.search( "+jetspeed +overview");
  -        System.out.println("hits = " + results.size());
  +        SearchResults results  = LuceneSearch.search( "Jetspeed");
  +        System.out.println("Query 'Jetspeed' hits = " + results.size());
  +        for (int i = 0; i < results.size(); i++)
  +        {
  +            result = results.get(i);
  +            System.out.println("Score = " + result.getScore());
  +            System.out.println("title = " + result.getTitle());
  +            System.out.println("summary = " + result.getDescription());
  +            System.out.println("url = " + result.getDocumentURL());
  +        }
  +    }
  +
  +    public void testVerifyJetspeedSearch1() throws Exception
  +    {
  +        SearchResult result = null;
  +        SearchResults results  = LuceneSearch.search( "Jetspeed Lucene");
  +        System.out.println("Query 'Jetspeed Lucene' hits = " + results.size());
           for (int i = 0; i < results.size(); i++)
           {
               result = results.get(i);
  @@ -159,4 +184,18 @@
           }
       }
   
  +    public void testVerifyJetspeedSearch2() throws Exception
  +    {
  +        SearchResult result = null;
  +        SearchResults results  = LuceneSearch.search( "google");
  +        System.out.println("Query 'goggle' hits = " + results.size());
  +        for (int i = 0; i < results.size(); i++)
  +        {
  +            result = results.get(i);
  +            System.out.println("Score = " + result.getScore());
  +            System.out.println("title = " + result.getTitle());
  +            System.out.println("summary = " + result.getDescription());
  +            System.out.println("url = " + result.getDocumentURL());
  +        }
  +    }
   }
  
  
  
  1.2       +114 -17   jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java
  
  Index: LuceneSearchService.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneSearchService.java	7 May 2003 00:17:16 -0000	1.1
  +++ LuceneSearchService.java	7 May 2003 05:38:29 -0000	1.2
  @@ -59,6 +59,10 @@
   import java.io.IOException;
   import java.net.URL;
   import javax.servlet.ServletConfig;
  +import org.apache.commons.httpclient.HttpClient;
  +import org.apache.commons.httpclient.HttpException;
  +import org.apache.commons.httpclient.HttpMethod;
  +import org.apache.commons.httpclient.methods.GetMethod;
   
   // Jetspeed imports
   
  @@ -75,6 +79,7 @@
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.analysis.standard.StandardAnalyzer;
   import org.apache.lucene.document.Document;
  +import org.apache.lucene.document.Field;
   import org.apache.lucene.index.IndexWriter;
   import org.apache.lucene.queryParser.QueryParser;
   import org.apache.lucene.search.IndexSearcher;
  @@ -87,12 +92,19 @@
       
       public static final String SERVICE_NAME = "LuceneSearch";
       private static final String CONFIG_DIRECTORY = "directory";
  +    private static final String CONFIG_CONTENT_FIELDNAME = "fieldname.content";
  +    private static final String CONFIG_CONTENT_FIELDNAME_DEFAULT = "Content";
       private static final String CONFIG_DESCRIPTION_FIELDNAME = "fieldname.description";
  +    private static final String CONFIG_DESCRIPTION_FIELDNAME_DEFAULT = "Description";
       private static final String CONFIG_TITLE_FIELDNAME = "fieldname.title";
  +    private static final String CONFIG_TITLE_FIELDNAME_DEFAULT = "Title";
       private static final String CONFIG_URL_FIELDNAME = "fieldname.url";
  +    private static final String CONFIG_URL_FIELDNAME_DEFAULT = "URL";
  +    private String contentFieldName = null;
       private String descriptionFieldName = null;
  +    private File rootDir = null;
       private String indexRoot = null;
  -    private Searcher searcher = null;
  +    //    private Searcher searcher = null;
       private String titleFieldName = null;
       private String urlFieldName  = null;
       
  @@ -172,16 +184,17 @@
           .getResources(LuceneSearchService.SERVICE_NAME);
           
           // Get config properties
  -        descriptionFieldName  = serviceConf.getString( CONFIG_DESCRIPTION_FIELDNAME);
  -        titleFieldName  = serviceConf.getString( CONFIG_TITLE_FIELDNAME);
  -        urlFieldName  = serviceConf.getString( CONFIG_URL_FIELDNAME);
  +        contentFieldName  = serviceConf.getString( CONFIG_CONTENT_FIELDNAME, CONFIG_CONTENT_FIELDNAME_DEFAULT);
  +        descriptionFieldName  = serviceConf.getString(CONFIG_DESCRIPTION_FIELDNAME, CONFIG_DESCRIPTION_FIELDNAME_DEFAULT);
  +        titleFieldName  = serviceConf.getString(CONFIG_TITLE_FIELDNAME, CONFIG_TITLE_FIELDNAME_DEFAULT);
  +        urlFieldName  = serviceConf.getString(CONFIG_URL_FIELDNAME, CONFIG_URL_FIELDNAME_DEFAULT);
           indexRoot = serviceConf.getString( CONFIG_DIRECTORY);
           
           //
           // The following section opens or creates the search index
           //
           //
  -        File rootDir = new File(indexRoot);
  +        rootDir = new File(indexRoot);
           
           //If the rootDir does not exist, treat it as context relative
           if (!rootDir.exists())
  @@ -200,7 +213,9 @@
           
           try
           {
  +            Searcher searcher = null;
               searcher = new IndexSearcher( rootDir.getPath());
  +            searcher.close();
           }
           catch (Exception e)
           {
  @@ -211,7 +226,6 @@
                   indexWriter.close();
                   indexWriter = null;
                   Log.info("Created Lucene Index in " + rootDir.getPath());
  -                searcher = new IndexSearcher(rootDir.getPath());
               }
               catch (Exception e1)
               {
  @@ -230,21 +244,23 @@
        *
        * @param searchString is the what is being searched for
        * @return Hits, if no hits then null.
  +     *
  +     * @task Parse content into title and description fields
        */
       public SearchResults search(String searchString)
       {
  +        Searcher searcher = null;
           Hits hits = null;
           try
           {
  +            searcher = new IndexSearcher(rootDir.getPath());
               Analyzer analyzer = new StandardAnalyzer();
  -            
  -            Query query = QueryParser.parse(searchString, "contents", analyzer);
  -            System.out.println("Searching for: " + query.toString("contents"));
  -            
  +            Query query = QueryParser.parse(searchString, this.contentFieldName, analyzer);
               hits = searcher.search(query);
           }
           catch (Exception e)
           {
  +            e.printStackTrace();
               Log.error(e);
           }
           
  @@ -259,22 +275,33 @@
               {
                   doc = hits.doc(counter);
                   result.setScore(hits.score(counter));
  -                result.setDescription(doc.getField(this.descriptionFieldName).toString());
  -                result.setTitle(doc.getField(this.titleFieldName).toString());
  -                result.setDocumentURL(doc.getField(this.urlFieldName).toString());
  +                //result.setDescription(doc.getField(this.descriptionFieldName).toString());
  +                //result.setTitle(doc.getField(this.titleFieldName).toString());
  +                result.setDocumentURL(doc.getField(this.urlFieldName).stringValue());
                   results.add(counter, result);
               }
               catch (IOException ioe)
               {
                   Log.error(ioe);
  -                throw new Error("Error retrieving search results", ioe);
               }
               result = null;
           }
  +        
  +        if (searcher != null)
  +        {
  +            try
  +            {
  +                searcher.close();
  +            }
  +            catch (IOException ioe)
  +            {
  +                Log.error("Closing Searcher", ioe);
  +            }
  +        }
           return results;
       }
  -
  -    /** 
  +    
  +    /**
        * Add a page to be indexed.
        *
        * @return true is page added sucessfully added.
  @@ -282,7 +309,77 @@
        */
       public boolean add(URL pageToAdd)
       {
  -        return false;
  +        HttpClient client = new HttpClient();
  +        client.startSession(pageToAdd);
  +        GetMethod method = new GetMethod("/");
  +        method.setFollowRedirects(true);
  +        int statusCode = -1;
  +        int attempt = 0;
  +        
  +        // We will retry up to 3 times.
  +        while (statusCode == -1 && attempt < 3)
  +        {
  +            try
  +            {
  +                // execute the method.
  +                client.executeMethod(method);
  +                statusCode = method.getStatusCode();
  +                System.out.println("Status code = " + statusCode);
  +            }
  +            catch (HttpException e)
  +            {
  +                // We will retry
  +            }
  +            catch (IOException e)
  +            {
  +                return false;
  +            }
  +        }
  +        // Check that we didn't run out of retries.
  +        if (statusCode != -1)
  +        {
  +            String content = null;
  +            try
  +            {
  +                content = method.getDataAsString();
  +            }
  +            catch (IOException ioe)
  +            {
  +                Log.error("Getting content for " + pageToAdd.toString(), ioe);
  +            }
  +            
  +            if (content != null)
  +            {
  +                try
  +                {
  +                    Document doc = new Document();
  +                    doc.add(Field.Text(this.urlFieldName, pageToAdd.toString()));
  +                    doc.add(Field.Text(this.contentFieldName, content));
  +                    IndexWriter indexWriter = new IndexWriter( rootDir, new StandardAnalyzer(),
false);
  +                    indexWriter.addDocument(doc);
  +                    System.out.println("Index Docuemnt Count = " + indexWriter.docCount());
  +                    indexWriter.optimize();
  +                    indexWriter.close();
  +                    Log.info("Added '" + pageToAdd.toString() + "' to index");
  +                }
  +                catch (Exception e)
  +                {
  +                    e.printStackTrace();
  +                    Log.error("Adding document to index", e);
  +                }
  +            }
  +        }
  +        try
  +        {
  +            client.endSession();
  +        }
  +        catch (IOException ioe)
  +        {
  +            ioe.printStackTrace();
  +            Log.error("Ending session to " + pageToAdd.toString(), ioe);
  +        }
  +        
  +        return (statusCode == 200);
       }
       
       public String[] getSearchSets()
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


Mime
View raw message