mirror of
https://github.com/django/django.git
synced 2025-07-05 02:09:13 +00:00
[search-api] Initial commit, Lucene working, Xapian and Hype almost working, needs polish.
git-svn-id: http://code.djangoproject.com/svn/django/branches/search-api@3636 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
682aed446b
commit
91790e27cd
0
django/contrib/search/__init__.py
Normal file
0
django/contrib/search/__init__.py
Normal file
19
django/contrib/search/backends.py
Normal file
19
django/contrib/search/backends.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from default import DefaultIndexer
|
||||||
|
|
||||||
|
try:
|
||||||
|
from xapian import XapianIndexer
|
||||||
|
except ImportError:
|
||||||
|
print "Xapian backend will not be available due to an ImportError. " \
|
||||||
|
"Do you have Xapian and Xapwrap installed?"
|
||||||
|
|
||||||
|
try:
|
||||||
|
from lucene import LuceneIndexer
|
||||||
|
except ImportError:
|
||||||
|
print "Lucene backend will not be available due to an ImportError. " \
|
||||||
|
"Do you have Lucene and PyLucene installed?"
|
||||||
|
|
||||||
|
try:
|
||||||
|
from hype import HypeIndexer
|
||||||
|
except ImportError:
|
||||||
|
print "Hyper Estraier backend will not be available due to an importError. " \
|
||||||
|
"Do you have Hyper Estraier and Hype installed?"
|
214
django/contrib/search/base.py
Normal file
214
django/contrib/search/base.py
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
from django.db import models
|
||||||
|
from django.core.exceptions import ObjectDoesNotExist
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# For Python 2.3
|
||||||
|
if not hasattr(__builtins__, 'set'):
|
||||||
|
from sets import Set as set
|
||||||
|
|
||||||
|
# FIXME: Methods that accept a field parameter claim to accept Field instances
|
||||||
|
# or strings giving the object path. However, since there is no Field
|
||||||
|
# attribute giving the Model it is bound to, these methods only work for
|
||||||
|
# strings at the moment. This doesn't really affect the ease of use of the
|
||||||
|
# library, as strings are actually easier to use.
|
||||||
|
|
||||||
|
def str_to_field(string, namespace=None):
|
||||||
|
"""Gets the column attribute from the model as indicated
|
||||||
|
by `string`, following ForeignKey attributes, etc.
|
||||||
|
|
||||||
|
Example: 'Person.first_name' -> Person._meta.get_field('first_name')
|
||||||
|
|
||||||
|
`namespace` is the dict-like object in which the object path will be
|
||||||
|
searched. If None, the caller's global namespace will be used, thanks
|
||||||
|
to the sys._getframe hack. This is important so that, for example,
|
||||||
|
if `string` is 'models.Person.first_name', the caller's models module
|
||||||
|
is used instead of the django.db.models module imported here.
|
||||||
|
"""
|
||||||
|
# FIXME: This whole function is either silly or clever...
|
||||||
|
objPath = string.split('.')
|
||||||
|
model = None
|
||||||
|
|
||||||
|
if namespace is None:
|
||||||
|
# FIXME: This uses the sys._getframe hack to get the caller's namespace.
|
||||||
|
obj = sys._getframe(1).f_globals
|
||||||
|
else:
|
||||||
|
obj = namespace
|
||||||
|
getter = obj.__getitem__
|
||||||
|
|
||||||
|
while objPath:
|
||||||
|
objName = objPath.pop(0)
|
||||||
|
|
||||||
|
# This might be better in a try/except block, but the respective
|
||||||
|
# exceptions for the getters (KeyError, AttributeError,
|
||||||
|
# FieldDoesNotExist) are already pretty descriptive...
|
||||||
|
obj = getter(objName)
|
||||||
|
|
||||||
|
if isinstance(obj, models.base.ModelBase):
|
||||||
|
model = obj
|
||||||
|
getter = model._meta.get_field
|
||||||
|
elif isinstance(obj, models.fields.related.ForeignKey):
|
||||||
|
model = obj.rel.to
|
||||||
|
getter = model._meta.get_field
|
||||||
|
|
||||||
|
# TODO: The rest of these could be more type-smart...
|
||||||
|
elif hasattr(obj, '__getitem__'):
|
||||||
|
getter = obj.__getitem__
|
||||||
|
elif hasattr(obj, '__getattribute__'):
|
||||||
|
getter = obj.__getattribute__
|
||||||
|
else:
|
||||||
|
getter = obj.__getattr__
|
||||||
|
|
||||||
|
if isinstance(obj, models.base.ModelBase):
|
||||||
|
model = obj
|
||||||
|
obj = obj._meta.pk
|
||||||
|
|
||||||
|
if not isinstance(obj, models.Field):
|
||||||
|
raise ValueError("%r is not a Field object! (%r -> %r)" % \
|
||||||
|
(objName, string, obj))
|
||||||
|
# FIXME: I don't think there is a way to get back to a field's Model
|
||||||
|
# from the Field object. This makes sense from a hierarchical viewpoint,
|
||||||
|
# but sure makes things like this harder. Hopefully setting this attribute
|
||||||
|
# won't mess anything up...
|
||||||
|
obj._model = model
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
class Indexer(object):
|
||||||
|
def __init__(self, path, model, fields=None, attributes=None, namespace=None, **kwargs):
|
||||||
|
"""Initialize an Indexer whose index data is stored at `path`.
|
||||||
|
`model` is the Model (or string name of the model) whose instances will
|
||||||
|
be used as documents. Note that fields from other models can still be
|
||||||
|
used in the index, but this model will be the one returned from search
|
||||||
|
results.
|
||||||
|
`fields` may be optionally initialized as an iterable of unnamed Fields.
|
||||||
|
`attributes` may be optionally initialized as a mapping of field names
|
||||||
|
to Fields.
|
||||||
|
`namespace` is the dict-like object in which fields passed as object
|
||||||
|
paths will be searched. If None, the caller's global namespace will be
|
||||||
|
used, thanks to the sys._getframe hack.
|
||||||
|
|
||||||
|
Example: If `fields` is ['models.Person.first_name'], it is important
|
||||||
|
that namespace['models'] refers to the intended module and NOT the
|
||||||
|
django.db.models module imported here.
|
||||||
|
"""
|
||||||
|
if fields is None:
|
||||||
|
fields = []
|
||||||
|
if attributes is None:
|
||||||
|
attributes = kwargs
|
||||||
|
else:
|
||||||
|
# `attributes` should take precedence to `kwargs`.
|
||||||
|
kwargs.update(attributes)
|
||||||
|
attributes = kwargs
|
||||||
|
|
||||||
|
if namespace is None:
|
||||||
|
# FIXME: This uses the sys._getframe hack to get the caller's namespace.
|
||||||
|
namespace = sys._getframe(1).f_globals
|
||||||
|
|
||||||
|
self._prepare_path(path)
|
||||||
|
|
||||||
|
self.path = path
|
||||||
|
self.model = model
|
||||||
|
self.text_fields = set([])
|
||||||
|
self.attr_fields = {}
|
||||||
|
|
||||||
|
for field in fields:
|
||||||
|
self.add_field(field, namespace=namespace)
|
||||||
|
|
||||||
|
for name, field in attributes.iteritems():
|
||||||
|
self.add_field(field, name, namespace=namespace)
|
||||||
|
|
||||||
|
pk = self.model._meta.pk
|
||||||
|
pk._model = self.model
|
||||||
|
if pk not in self.text_fields and pk not in set(self.attr_fields.values()):
|
||||||
|
self.add_field(pk, 'pk', namespace=namespace)
|
||||||
|
|
||||||
|
def add_field(self, field, name=None, namespace=None):
|
||||||
|
"""Add the given field to the Indexer, where `field` is either
|
||||||
|
an object path string or a Field instance. If `name` is None,
|
||||||
|
the field will be added to self.text_fields, otherwise it will be
|
||||||
|
added to self.attr_fields with the given name.
|
||||||
|
`namespace` has the same meaning as in __init__.
|
||||||
|
"""
|
||||||
|
# FIXME: This uses the sys._getframe hack to get the caller's namespace.
|
||||||
|
if namespace is None:
|
||||||
|
namespace = sys._getframe(1).f_globals
|
||||||
|
|
||||||
|
# FIXME: Detect duplicates, or user-knows-best?
|
||||||
|
if isinstance(field, basestring):
|
||||||
|
field = str_to_field(field, namespace)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
self.attr_fields[name] = field
|
||||||
|
else:
|
||||||
|
self.text_fields.add(field)
|
||||||
|
|
||||||
|
def remove_field(self, field=None, name=None, find_name=True, namespace=None):
|
||||||
|
"""Remove the given field from the Indexer, where `field` is either
|
||||||
|
an object path string or a Field instance. If `name` is given,
|
||||||
|
the field with that name is removed. If both `field` and `name`
|
||||||
|
are given, both are removed if they refer to different fields.
|
||||||
|
If `find_name` is True, the named fields in self.attr_fields are
|
||||||
|
searched for `field`, otherwise only self.text_fields is searched.
|
||||||
|
`namespace` has the same meaning as in __init__.
|
||||||
|
"""
|
||||||
|
# FIXME: This uses the sys._getframe hack to get the caller's namespace.
|
||||||
|
if namespace is None:
|
||||||
|
namespace = sys._getframe(1).f_globals
|
||||||
|
|
||||||
|
if name:
|
||||||
|
if name in self.attr_fields:
|
||||||
|
del self.attr_fields[name]
|
||||||
|
return
|
||||||
|
|
||||||
|
if field:
|
||||||
|
if isinstance(field, basestring):
|
||||||
|
field = str_to_field(field, namespace)
|
||||||
|
|
||||||
|
self.text_fields.discard(field)
|
||||||
|
|
||||||
|
if find_name:
|
||||||
|
for name, f in self.attr_fields.items():
|
||||||
|
# TODO: Make sure identity is correct here
|
||||||
|
if f is field:
|
||||||
|
del self.attr_fields[name]
|
||||||
|
|
||||||
|
def search(self, query_string, sortBy=None):
|
||||||
|
"""Query the index for `query_string` and return a HitResults instance.
|
||||||
|
`order_by` can have the same values as Model.objects.order_by, with
|
||||||
|
'SCORE' being the default.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def index(self, document):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def update(self, force=False):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _prepare_path(self, path):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_indexer():
|
||||||
|
# Note: I'm not very good at writing tests.
|
||||||
|
|
||||||
|
class Person(models.Model):
|
||||||
|
first_name = models.CharField(maxlength=30)
|
||||||
|
last_name = models.CharField(maxlength=30)
|
||||||
|
description = models.TextField()
|
||||||
|
|
||||||
|
i = Indexer('', Person, ['Person.description'], {'first': 'Person.first_name'},
|
||||||
|
last='Person.last_name', namespace=locals())
|
||||||
|
|
||||||
|
assert Person._meta.get_field('description') in i.text_fields
|
||||||
|
assert set([Person._meta.get_field('first_name'),
|
||||||
|
Person._meta.get_field('last_name')]) == \
|
||||||
|
set(i.attr_fields.values())
|
||||||
|
assert 'first' in i.attr_fields and 'last' in i.attr_fields
|
||||||
|
|
||||||
|
i.remove_field('Person.description', namespace=locals())
|
||||||
|
assert not i.text_fields
|
||||||
|
|
||||||
|
i.remove_field(name='last')
|
||||||
|
assert 'last' not in i.attr_fields
|
||||||
|
print "Test succeeded."
|
||||||
|
return i
|
9
django/contrib/search/default.py
Normal file
9
django/contrib/search/default.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from base import Indexer
|
||||||
|
|
||||||
|
# This is the future home of a pure-Python text indexer.
|
||||||
|
|
||||||
|
# Alec Thomas has created a built-in indexer for his library here:
|
||||||
|
# http://swapoff.org/wiki/pyndexter
|
||||||
|
|
||||||
|
class DefaultIndexer(Indexer):
|
||||||
|
pass
|
35
django/contrib/search/hype.py
Normal file
35
django/contrib/search/hype.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from base import Indexer
|
||||||
|
from query import ResultSet, Hit
|
||||||
|
|
||||||
|
import hype
|
||||||
|
|
||||||
|
# TODO: This is very incomplete.
|
||||||
|
|
||||||
|
class HypeIndexer(Indexer):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(Indexer, self).__init__(*args, **kwargs)
|
||||||
|
self.db = hype.Database(self.path, hype.ESTDBWRITER | hype.ESTDBCREAT)
|
||||||
|
|
||||||
|
def index(self, row):
|
||||||
|
document = hype.Document()
|
||||||
|
document['@pk'] = row._get_pk_val()
|
||||||
|
document.add_text()
|
||||||
|
|
||||||
|
def search(self, query_string, sortBy=None):
|
||||||
|
searcher = self.db.search(query_string)
|
||||||
|
return HypeResultSet(searcher)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.db.close()
|
||||||
|
|
||||||
|
|
||||||
|
class HypeResultSet(ResultSet):
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._hits)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for hit in self._hits:
|
||||||
|
yield HypeHit(hit, self._indexer)
|
||||||
|
|
||||||
|
class HypeHit(Hit):
|
||||||
|
pass
|
162
django/contrib/search/lucene.py
Normal file
162
django/contrib/search/lucene.py
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
from base import Indexer
|
||||||
|
from query import ResultSet, Hit
|
||||||
|
from itertools import imap
|
||||||
|
import os, sys
|
||||||
|
|
||||||
|
import PyLucene
|
||||||
|
|
||||||
|
# WARNING!*
|
||||||
|
# PyLucene wants you to use PyLucene.PythonThread for threading.
|
||||||
|
# Look at samples/ThreadIndexFiles.py bundled with PyLucene.
|
||||||
|
# * I'm not sure how important this is.
|
||||||
|
|
||||||
|
# TODO: Make Lucene aware of field types.
|
||||||
|
|
||||||
|
# Here's how to use me:
|
||||||
|
#
|
||||||
|
# class Person(models.Model):
|
||||||
|
# first_name = models.CharField(maxlength=30)
|
||||||
|
# last_name = models.CharField(maxlength=30)
|
||||||
|
# biography = models.TextField()
|
||||||
|
#
|
||||||
|
# indexer = LuceneIndexer('/tmp/lucene-index', Person, [biography],
|
||||||
|
# {'first': 'Person.first_name',
|
||||||
|
# 'last': 'Person.last_name'})
|
||||||
|
# indexer.update() # Note, calling this multiple times without clearing old
|
||||||
|
# # entries will cause duplicates in the index.
|
||||||
|
# indexer.search("brian -last:beck")
|
||||||
|
|
||||||
|
class LuceneIndexer(Indexer):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
# FIXME: This uses the sys._getframe hack to get the caller's namespace.
|
||||||
|
namespace = sys._getframe(1).f_globals
|
||||||
|
kwargs['namespace'] = namespace
|
||||||
|
super(LuceneIndexer, self).__init__(*args, **kwargs)
|
||||||
|
self.writer_closed = True
|
||||||
|
|
||||||
|
def _prepare_path(self, path):
|
||||||
|
# Lucene wants an abstraction of the directory.
|
||||||
|
# Should look into storage in a Model-compatible database in the future...
|
||||||
|
self._store = PyLucene.FSDirectory.getDirectory(path, True)
|
||||||
|
|
||||||
|
def update(self, documents=None):
|
||||||
|
close = False
|
||||||
|
if self.writer_closed:
|
||||||
|
close = True
|
||||||
|
self.open_writer()
|
||||||
|
|
||||||
|
if documents is None:
|
||||||
|
update_queue = self.model.objects.all()
|
||||||
|
else:
|
||||||
|
update_queue = documents
|
||||||
|
|
||||||
|
for document in update_queue:
|
||||||
|
self.delete(document)
|
||||||
|
self.index(document)
|
||||||
|
|
||||||
|
if close:
|
||||||
|
self.close_writer()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
close = False
|
||||||
|
if self.writer_closed:
|
||||||
|
close = True
|
||||||
|
self.open_writer()
|
||||||
|
for i in xrange(self._writer.docCount()):
|
||||||
|
self._writer.deleteDocument(i)
|
||||||
|
if close:
|
||||||
|
self.close_writer()
|
||||||
|
|
||||||
|
def delete(self, row):
|
||||||
|
reader = PyLucene.IndexReader.open(self.path)
|
||||||
|
reader.deleteDocuments(PyLucene.Term('pk', str(row._get_pk_val())))
|
||||||
|
reader.close()
|
||||||
|
|
||||||
|
def open_writer(self):
|
||||||
|
self.writer_closed = False
|
||||||
|
self._writer = PyLucene.IndexWriter(self._store, PyLucene.StandardAnalyzer(), True)
|
||||||
|
self._writer.setMaxFieldLength(1048576) # Max number of tokens stored per field?
|
||||||
|
|
||||||
|
def close_writer(self):
|
||||||
|
self._writer.optimize()
|
||||||
|
self._writer.close()
|
||||||
|
self.writer_closed = True
|
||||||
|
|
||||||
|
def index(self, row):
|
||||||
|
close = False
|
||||||
|
if self.writer_closed:
|
||||||
|
close = True
|
||||||
|
self.open_writer()
|
||||||
|
|
||||||
|
document = PyLucene.Document()
|
||||||
|
|
||||||
|
for name, field in self.attr_fields.iteritems():
|
||||||
|
# FIXME: Assumes no Foreign Keys! Lame!
|
||||||
|
value = getattr(row, field.name)
|
||||||
|
document.add(PyLucene.Field(name, str(value),
|
||||||
|
PyLucene.Field.Store.YES,
|
||||||
|
PyLucene.Field.Index.TOKENIZED))
|
||||||
|
# Lucene only seems to support one 'default' field.
|
||||||
|
# However, we might want multiple fields to be searched
|
||||||
|
# by default. Hopefully just joining their contents with
|
||||||
|
# newlines solves this.
|
||||||
|
contents = '\n'.join([str(getattr(row, field.name)) for field in \
|
||||||
|
self.text_fields])
|
||||||
|
# FIXME: Hardcoded 'contents' field.
|
||||||
|
document.add(PyLucene.Field('contents', contents,
|
||||||
|
PyLucene.Field.Store.YES,
|
||||||
|
PyLucene.Field.Index.TOKENIZED))
|
||||||
|
self._writer.addDocument(document)
|
||||||
|
if close:
|
||||||
|
self.close_writer()
|
||||||
|
|
||||||
|
def search(self, query_string, default_field='contents', order_by='RELEVANCE'):
|
||||||
|
searcher = PyLucene.IndexSearcher(self._store)
|
||||||
|
analyzer = PyLucene.StandardAnalyzer()
|
||||||
|
query = PyLucene.QueryParser(default_field, analyzer).parse(query_string)
|
||||||
|
|
||||||
|
if order_by == 'SCORE':
|
||||||
|
sort_field = PyLucene.SortField.FIELD_SCORE
|
||||||
|
sort = PyLucene.Sort(sort_field)
|
||||||
|
elif order_by == 'INDEX':
|
||||||
|
sort = PyLucene.Sort.INDEXORDER
|
||||||
|
elif order_by == 'RELEVANCE':
|
||||||
|
sort = PyLucene.Sort.RELEVANCE
|
||||||
|
else:
|
||||||
|
reverse = order_by.startswith('-')
|
||||||
|
while order_by[0] in '+-':
|
||||||
|
order_by = order_by[1:]
|
||||||
|
sort_field = PyLucene.SortField(order_by, reverse)
|
||||||
|
sort = PyLucene.Sort(sort_field)
|
||||||
|
hits = searcher.search(query, sort)
|
||||||
|
return LuceneResultSet(hits, self)
|
||||||
|
|
||||||
|
|
||||||
|
class LuceneResultSet(ResultSet):
|
||||||
|
def __init__(self, hits, indexer):
|
||||||
|
self._hits = hits
|
||||||
|
self._indexer = indexer
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self._hits.length()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for hit in self._hits:
|
||||||
|
yield LuceneHit(hit, self._indexer)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return LuceneHit(self._hits.__getitem__(item))
|
||||||
|
|
||||||
|
|
||||||
|
class LuceneHit(Hit):
|
||||||
|
def get_pk(self):
|
||||||
|
# FIXME: Hardcoded 'pk' field.
|
||||||
|
return self.data.get('pk')
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return self.data.__getitem__(item)
|
||||||
|
|
||||||
|
def get_score(self):
|
||||||
|
return self.data.getScore()
|
||||||
|
|
||||||
|
score = property(get_score)
|
27
django/contrib/search/models.py
Normal file
27
django/contrib/search/models.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
from django.db import models
|
||||||
|
|
||||||
|
# Note: These aren't used yet, but they probably will be in the future.
|
||||||
|
# This is because the only thing that really needs to be remembered
|
||||||
|
# (the path to the index) is going to go in SETTINGS anyway.
|
||||||
|
# But persistent info such as outdated rows, search statistics, etc.
|
||||||
|
# could still be useful.
|
||||||
|
|
||||||
|
class Index(models.Model):
|
||||||
|
model_name = models.CharField(maxlength=255)
|
||||||
|
|
||||||
|
class IndexedField(models.Model):
|
||||||
|
object_path = models.CharField(maxlength=255)
|
||||||
|
model = models.ForeignKey('Index')
|
||||||
|
|
||||||
|
class QueryLog(models.Model):
|
||||||
|
"""This is not a full log, but merely counts queries."""
|
||||||
|
query = models.CharField(maxlength=255, unique=True)
|
||||||
|
query_count = models.IntegerField(default=1)
|
||||||
|
last_date = DateTimeField()
|
||||||
|
last_source = models.CharField("Some identifier for who sent the query", maxlength=255)
|
||||||
|
|
||||||
|
class Person(models.Model):
|
||||||
|
"""This is for testing."""
|
||||||
|
first_name = models.CharField(maxlength=30)
|
||||||
|
last_name = models.CharField(maxlength=30)
|
||||||
|
description = models.TextField()
|
36
django/contrib/search/query.py
Normal file
36
django/contrib/search/query.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
class QueryParser(object):
|
||||||
|
# TODO: Make a common query language for all the backends.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ResultSet(object):
|
||||||
|
def __iter__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __getitem__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class Hit(object):
|
||||||
|
def __init__(self, data, indexer):
|
||||||
|
self.indexer = indexer
|
||||||
|
self.model = indexer.model
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def get_instance(self):
|
||||||
|
name = self.model._meta.pk.name
|
||||||
|
pk = self.model._meta.pk.to_python(self.get_pk())
|
||||||
|
return self.model.objects.get(**{name: pk})
|
||||||
|
|
||||||
|
instance = property(get_instance)
|
||||||
|
|
||||||
|
def get_pk(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s: %s %s, Score: %s>" % (self.__class__.__name__,
|
||||||
|
self.model._meta,
|
||||||
|
self.get_pk(), self.score)
|
1
django/contrib/search/views.py
Normal file
1
django/contrib/search/views.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# Create your views here.
|
64
django/contrib/search/xapian.py
Normal file
64
django/contrib/search/xapian.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
from django.db import models
|
||||||
|
from datetime import datetime
|
||||||
|
import xapwrap.index
|
||||||
|
import xapwrap.document
|
||||||
|
from itertools import imap
|
||||||
|
|
||||||
|
from base import Indexer, ResultSet
|
||||||
|
|
||||||
|
# TODO: This is incomplete.
|
||||||
|
|
||||||
|
class XapianIndexer(Indexer):
|
||||||
|
def update(self, documents=None):
|
||||||
|
idx = xapwrap.index.Index(self.path, True)
|
||||||
|
|
||||||
|
if documents is None:
|
||||||
|
update_queue = self.model.objects.all()
|
||||||
|
else:
|
||||||
|
update_queue = documents
|
||||||
|
|
||||||
|
for row in documents:
|
||||||
|
keys = []
|
||||||
|
for name, field in self.attr_fields.iteritems():
|
||||||
|
keys.append(xapwrap.document.SortKey(name, getattr(self.model, field.name)))
|
||||||
|
|
||||||
|
d = xapwrap.document.Document(textFields=fields, sortFields=keys, uid=row._get_pk_val())
|
||||||
|
idx.index(d)
|
||||||
|
idx.close()
|
||||||
|
|
||||||
|
def search(self, query, order_by='RELEVANCE'):
|
||||||
|
idx = Index(self.path)
|
||||||
|
if order_by == 'RELEVANCE':
|
||||||
|
results = idx.search(query, sortByRelevence=True)
|
||||||
|
else:
|
||||||
|
ascending = True
|
||||||
|
if isinstance(order_by, basestring) and order_by.startswith('-'):
|
||||||
|
ascending = False
|
||||||
|
while order_by[0] in '+-':
|
||||||
|
order_by = order_by[1:]
|
||||||
|
results = idx.search(query, order_by, sortAscending=ascending)
|
||||||
|
return XapianResultSet(results)
|
||||||
|
|
||||||
|
|
||||||
|
class XapianResultSet(ResultSet):
|
||||||
|
def __init__(self, hits, indexer):
|
||||||
|
self._hits = hits
|
||||||
|
self._indexer = indexer
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._hits)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for hit in self._hits):
|
||||||
|
yield XapianHit(hit, self._indexer)
|
||||||
|
|
||||||
|
|
||||||
|
class XapianHit(object):
|
||||||
|
def get_pk(self):
|
||||||
|
return self.data['pk']
|
||||||
|
|
||||||
|
def get_score(self):
|
||||||
|
return self.data['score']
|
||||||
|
|
||||||
|
score = property(get_score)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user