httpd-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pque...@apache.org
Subject svn commit: r729641 - /httpd/mod_mbox/trunk/scripts/site-sitemap.py
Date Sat, 27 Dec 2008 16:51:31 GMT
Author: pquerna
Date: Sat Dec 27 08:51:31 2008
New Revision: 729641

URL: http://svn.apache.org/viewvc?rev=729641&view=rev
Log:
Change the sitemap index generator to split the sitemap indexes every 500 entries, as the
great GOOG doesn't like big sitemap indexes.

Modified:
    httpd/mod_mbox/trunk/scripts/site-sitemap.py

Modified: httpd/mod_mbox/trunk/scripts/site-sitemap.py
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/scripts/site-sitemap.py?rev=729641&r1=729640&r2=729641&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/scripts/site-sitemap.py (original)
+++ httpd/mod_mbox/trunk/scripts/site-sitemap.py Sat Dec 27 08:51:31 2008
@@ -17,6 +17,7 @@
     out = get_output(cmd).split()
     return int(out[0]) * 1024
 
+BASEPATH=sys.argv[1]
 ROOT="/x1/mail-archives/mod_mbox"
 HOSTNAME="http://mail-archives.apache.org/mod_mbox/"
 PARITION_SIZE=100 * 1024 * 1024
@@ -37,15 +38,16 @@
 keys = tlps.keys()
 keys.sort()
 
-print """<?xml version="1.0" encoding="UTF-8"?>
-<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-"""
-# TODO: Insert a 'static' file sitemap.
-"""
-   <sitemap>
-      <loc>/sitemap-static.xml</loc>
-   </sitemap>
-"""
+count = 0
+fcount = 0
+def write_sitemap_header(fp):
+    fp.write("""<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n""")
+def write_sitemap_footer(fp):
+    fp.write("</sitemapindex>\n")
+
+fp = open(BASEPATH % (fcount), 'w')
+
+write_sitemap_header(fp)
 
 for tlp in keys:
     klist = tlps[tlp].keys()
@@ -54,13 +56,21 @@
         name = tlps[tlp][list][0]
         size = tlps[tlp][list][1]
         if size < PARITION_SIZE:
-            print "   <sitemap><loc>%s%s/?format=sitemap</loc></sitemap>"
% (HOSTNAME, name)
+            count += 1
+            fp.write("<sitemap><loc>%s%s/?format=sitemap</loc></sitemap>\n"
% (HOSTNAME, name))
         else:
             part = (size / PARITION_SIZE) + 1
             for i in range(0, part):
-                print "   <sitemap><loc>%s%s/?format=sitemap&amp;pmax=%d&amp;part=%d</loc></sitemap>"
% (HOSTNAME, name, part, i)
+                count += 1
+                fp.write("<sitemap><loc>%s%s/?format=sitemap&amp;pmax=%d&amp;part=%d</loc></sitemap>\n"
% (HOSTNAME, name, part, i))
+        if count > 500:
+            write_sitemap_footer(fp)
+            fp.close()
+            count = 0
+            fcount += 1
+            fp = open(BASEPATH  % (fcount), 'w')
+            write_sitemap_header(fp)
+
+write_sitemap_footer(fp)
 
-print """
-</sitemapindex>
-"""
 



Mime
View raw message