Monday, March 31, 2008

Getting started on search

Spent an hour tonight playing with Lucene.Net. I've got a real simple command line version of search going. I also think I came up with a good idea for queueing indexes. Hopefully this weekend I'll have time to hack on that. For now search seems like an easier problem than I expected though.

Here's my real simple indexer and searcher:



using System;
using System.Configuration;
using System.Collections.Generic;

using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Analysis.Standard;

using MySql.Data.MySqlClient;


namespace Grurrah.Tools {

public class Indexer {

private static readonly int GrurrahIdColumn = 0;
private static readonly int GrurrahNameColumn = 1;
private static readonly int EntryTextColumn = 2;
private static readonly int TagNameColumn = 3;
private static readonly int RatingAvgColumn = 4;

private static readonly string QueryString = "SELECT Grurrahs.id, Grurrahs.name, Entries.text, Tags.name, AVG(user_grurrah_ratings.rating) FROM Grurrahs " +
"RIGHT JOIN Entries ON Grurrahs.id = Entries.grurrah_id AND Grurrahs.entry_index = Entries.entry_index " +
"LEFT JOIN tag_map ON Grurrahs.id = tag_map.grurrah_id LEFT JOIN Tags on tag_map.tag_id = Tags.id " +
"LEFT JOIN user_grurrah_ratings ON Grurrahs.id = user_grurrah_ratings.grurrah_id GROUP BY Grurrahs.id, Tags.name";


private static MySqlDataReader reader;

public static void Main ()
{
LoadData ();
IndexData ();
}

private static void LoadData ()
{
ConnectionStringSettings cs = ConfigurationManager.ConnectionStrings ["Read"];
MySqlConnection con = new MySqlConnection (cs.ConnectionString);

con.Open();

MySqlCommand cmd = new MySqlCommand (QueryString, con);
reader = cmd.ExecuteReader ();
}

private static void IndexData ()
{
IndexWriter writer = new IndexWriter ("Indexes", new StandardAnalyzer (), true);

while (reader.Read ()) {
if (reader [GrurrahNameColumn] == DBNull.Value)
continue;
writer.AddDocument (IndexGrurrah ());
}

writer.Optimize ();
writer.Close ();
}

public static Document IndexGrurrah ()
{
Document doc = new Document ();

if (reader [RatingAvgColumn] != DBNull.Value)
doc.SetBoost (reader.GetInt32 (RatingAvgColumn));

doc.Add (new Field ("name", reader.GetString ("name"), Field.Store.YES, Field.Index.TOKENIZED));
doc.Add (new Field ("text", reader.GetString ("text"), Field.Store.YES, Field.Index.TOKENIZED));

return doc;
}
}
}




using System;
using Lucene.Net.Search;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Analysis.Standard;



namespace Grurrah.Tools {

public class Search {

public static void Main (string [] args)
{
if (args.Length < 1) {
Console.Error.WriteLine ("You must supply a query parameter.");
return;
}

IndexSearcher searcher = new IndexSearcher ("Indexes/");
QueryParser parser = new QueryParser ("text", new StandardAnalyzer ());
Query query = parser.Parse (args [0]);
Hits hits = searcher.Search (query);

for (int i = 0; i < hits.Length (); i++) {
Document doc = hits.Doc (i);
Console.WriteLine ("{0}: {1} -- {2}", doc.Get ("name"), doc.Get ("text"), hits.Score (i));
}

searcher.Close ();
}
}
}

No comments: