mirror of
				https://github.com/django/django.git
				synced 2025-10-31 09:41:08 +00:00 
			
		
		
		
	Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using
pagination). Patch from Julian Bez. The docs patch here could probably do with some rewording. git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
		
							
								
								
									
										3
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								AUTHORS
									
									
									
									
									
								
							@@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
 | 
				
			|||||||
    Esdras Beleza <linux@esdrasbeleza.com>
 | 
					    Esdras Beleza <linux@esdrasbeleza.com>
 | 
				
			||||||
    Chris Bennett <chrisrbennett@yahoo.com>
 | 
					    Chris Bennett <chrisrbennett@yahoo.com>
 | 
				
			||||||
    James Bennett
 | 
					    James Bennett
 | 
				
			||||||
    Ben Godfrey <http://aftnn.org>
 | 
					    Julian Bez
 | 
				
			||||||
    Arvis Bickovskis <viestards.lists@gmail.com>
 | 
					    Arvis Bickovskis <viestards.lists@gmail.com>
 | 
				
			||||||
    Paul Bissex <http://e-scribe.com/>
 | 
					    Paul Bissex <http://e-scribe.com/>
 | 
				
			||||||
    Simon Blanchard
 | 
					    Simon Blanchard
 | 
				
			||||||
@@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
 | 
				
			|||||||
    glin@seznam.cz
 | 
					    glin@seznam.cz
 | 
				
			||||||
    martin.glueck@gmail.com
 | 
					    martin.glueck@gmail.com
 | 
				
			||||||
    Artyom Gnilov <boobsd@gmail.com>
 | 
					    Artyom Gnilov <boobsd@gmail.com>
 | 
				
			||||||
 | 
					    Ben Godfrey <http://aftnn.org>
 | 
				
			||||||
    GomoX <gomo@datafull.com>
 | 
					    GomoX <gomo@datafull.com>
 | 
				
			||||||
    Guilherme Mesquita Gondim <semente@taurinus.org>
 | 
					    Guilherme Mesquita Gondim <semente@taurinus.org>
 | 
				
			||||||
    Mario Gonzalez <gonzalemario@gmail.com>
 | 
					    Mario Gonzalez <gonzalemario@gmail.com>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
from django.core import urlresolvers
 | 
					from django.core import urlresolvers, paginator
 | 
				
			||||||
import urllib
 | 
					import urllib
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PING_URL = "http://www.google.com/webmasters/tools/ping"
 | 
					PING_URL = "http://www.google.com/webmasters/tools/ping"
 | 
				
			||||||
@@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
 | 
				
			|||||||
    urllib.urlopen("%s?%s" % (ping_url, params))
 | 
					    urllib.urlopen("%s?%s" % (ping_url, params))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Sitemap:
 | 
					class Sitemap:
 | 
				
			||||||
 | 
					    # This limit is defined by Google. See the index documentation at
 | 
				
			||||||
 | 
					    # http://sitemaps.org/protocol.php#index.
 | 
				
			||||||
 | 
					    limit = 50000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __get(self, name, obj, default=None):
 | 
					    def __get(self, name, obj, default=None):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            attr = getattr(self, name)
 | 
					            attr = getattr(self, name)
 | 
				
			||||||
@@ -49,11 +53,17 @@ class Sitemap:
 | 
				
			|||||||
    def location(self, obj):
 | 
					    def location(self, obj):
 | 
				
			||||||
        return obj.get_absolute_url()
 | 
					        return obj.get_absolute_url()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_urls(self):
 | 
					    def _get_paginator(self):
 | 
				
			||||||
 | 
					        if not hasattr(self, "paginator"):
 | 
				
			||||||
 | 
					            self.paginator = paginator.Paginator(self.items(), self.limit)
 | 
				
			||||||
 | 
					        return self.paginator
 | 
				
			||||||
 | 
					    paginator = property(_get_paginator)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_urls(self, page=1):
 | 
				
			||||||
        from django.contrib.sites.models import Site
 | 
					        from django.contrib.sites.models import Site
 | 
				
			||||||
        current_site = Site.objects.get_current()
 | 
					        current_site = Site.objects.get_current()
 | 
				
			||||||
        urls = []
 | 
					        urls = []
 | 
				
			||||||
        for item in self.items():
 | 
					        for item in self.paginator.page(page).object_list:
 | 
				
			||||||
            loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
 | 
					            loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
 | 
				
			||||||
            url_info = {
 | 
					            url_info = {
 | 
				
			||||||
                'location':   loc,
 | 
					                'location':   loc,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,14 +3,22 @@ from django.template import loader
 | 
				
			|||||||
from django.contrib.sites.models import Site
 | 
					from django.contrib.sites.models import Site
 | 
				
			||||||
from django.core import urlresolvers
 | 
					from django.core import urlresolvers
 | 
				
			||||||
from django.utils.encoding import smart_str
 | 
					from django.utils.encoding import smart_str
 | 
				
			||||||
 | 
					from django.core.paginator import EmptyPage, PageNotAnInteger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def index(request, sitemaps):
 | 
					def index(request, sitemaps):
 | 
				
			||||||
    current_site = Site.objects.get_current()
 | 
					    current_site = Site.objects.get_current()
 | 
				
			||||||
    sites = []
 | 
					    sites = []
 | 
				
			||||||
    protocol = request.is_secure() and 'https' or 'http'
 | 
					    protocol = request.is_secure() and 'https' or 'http'
 | 
				
			||||||
    for section in sitemaps.keys():
 | 
					    for section, site in sitemaps.items():
 | 
				
			||||||
 | 
					        if callable(site):
 | 
				
			||||||
 | 
					            pages = site().paginator.num_pages
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            pages = site.paginator.num_pages
 | 
				
			||||||
        sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
 | 
					        sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
 | 
				
			||||||
        sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
 | 
					        sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
 | 
				
			||||||
 | 
					        if pages > 1:
 | 
				
			||||||
 | 
					            for page in range(2, pages+1):
 | 
				
			||||||
 | 
					                sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
 | 
				
			||||||
    xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
 | 
					    xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
 | 
				
			||||||
    return HttpResponse(xml, mimetype='application/xml')
 | 
					    return HttpResponse(xml, mimetype='application/xml')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
 | 
				
			|||||||
        maps.append(sitemaps[section])
 | 
					        maps.append(sitemaps[section])
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        maps = sitemaps.values()
 | 
					        maps = sitemaps.values()
 | 
				
			||||||
 | 
					    page = request.GET.get("p", 1)
 | 
				
			||||||
    for site in maps:
 | 
					    for site in maps:
 | 
				
			||||||
        if callable(site):
 | 
					        try:
 | 
				
			||||||
            urls.extend(site().get_urls())
 | 
					            if callable(site):
 | 
				
			||||||
        else:
 | 
					                urls.extend(site().get_urls(page))
 | 
				
			||||||
            urls.extend(site.get_urls())
 | 
					            else:
 | 
				
			||||||
 | 
					                urls.extend(site.get_urls(page))
 | 
				
			||||||
 | 
					        except EmptyPage:
 | 
				
			||||||
 | 
					            raise Http404("Page %s empty" % page)
 | 
				
			||||||
 | 
					        except PageNotAnInteger:
 | 
				
			||||||
 | 
					            raise Http404("No page '%s'" % page)
 | 
				
			||||||
    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
 | 
					    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
 | 
				
			||||||
    return HttpResponse(xml, mimetype='application/xml')
 | 
					    return HttpResponse(xml, mimetype='application/xml')
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
 | 
				
			|||||||
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
 | 
					both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
 | 
				
			||||||
classes and the ``sitemaps`` dict don't change at all.
 | 
					classes and the ``sitemaps`` dict don't change at all.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If one of your sitemaps is going to have more than 50,000 URLs you should 
 | 
				
			||||||
 | 
					create an index file. Your sitemap will be paginated and the index will 
 | 
				
			||||||
 | 
					reflect that.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Pinging Google
 | 
					Pinging Google
 | 
				
			||||||
==============
 | 
					==============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user