Mailing-List: contact commits-help@spark.apache.org; run by ezmlm
Precedence: bulk
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: andrewor14@apache.org
To: commits@spark.apache.org
Message-Id: <b11a5948cd97432f82a0ad9124766dde@git.apache.org>
Subject: spark git commit: [Release] Update contributors list format and sort
 it
Date: Wed, 17 Dec 2014 06:16:04 +0000 (UTC)

Repository: spark
Updated Branches:
  refs/heads/branch-1.1 991748d8b -> 0efd691d9


[Release] Update contributors list format and sort it

Additionally, we now warn the user when a duplicate author name
arises, in which case he/she needs to resolve it manually.

Conflicts:
	.rat-excludes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0efd691d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0efd691d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0efd691d

Branch: refs/heads/branch-1.1
Commit: 0efd691d983e6a1fa8b3a95ed1c03408febf679d
Parents: 991748d
Author: Andrew Or <andrew@databricks.com>
Authored: Tue Dec 16 22:11:03 2014 -0800
Committer: Andrew Or <andrew@databricks.com>
Committed: Tue Dec 16 22:15:56 2014 -0800

----------------------------------------------------------------------
 .gitignore                                   |  2 +-
 .rat-excludes                                |  2 ++
 dev/create-release/generate-contributors.py  |  8 +++---
 dev/create-release/translate-contributors.py | 34 ++++++++++++++++-------
 4 files changed, 31 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0efd691d/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index ed79250..bb3209e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,7 @@ checkpoint
 derby.log
 dist/
 dev/create-release/*txt
-dev/create-release/*new
+dev/create-release/*final
 spark-*-bin-*.tgz
 unit-tests.log
 /lib/

http://git-wip-us.apache.org/repos/asf/spark/blob/0efd691d/.rat-excludes
----------------------------------------------------------------------
diff --git a/.rat-excludes b/.rat-excludes
index eaefef1..32f2c4b 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -57,3 +57,5 @@ dist/*
 .*iws
 logs
 .*scalastyle-output.xml
+.*dependency-reduced-pom.xml
+dev/create-release/known_translations

http://git-wip-us.apache.org/repos/asf/spark/blob/0efd691d/dev/create-release/generate-contributors.py
----------------------------------------------------------------------
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index e65c5d8..8aaa250 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -192,9 +192,9 @@ for commit in filtered_commits:
 print "==================================================================================\n"
 
 # Write to contributors file ordered by author names
-# Each line takes the format "Author name - semi-colon delimited contributions"
-# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
-# e.g. Tathagata Das - Bug fixes and new features in Streaming
+# Each line takes the format " * Author name -- semi-colon delimited contributions"
+# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
+# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
 contributors_file = open(contributors_file_name, "w")
 authors = author_info.keys()
 authors.sort()
@@ -223,7 +223,7 @@ for author in authors:
     # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
     if author in invalid_authors and invalid_authors[author]:
         author = author + "/" + "/".join(invalid_authors[author])
-    line = "%s - %s" % (author, contribution)
+    line = " * %s -- %s" % (author, contribution)
     contributors_file.write(line + "\n")
 contributors_file.close()
 print "Contributors list is successfully written to %s!" % contributors_file_name

http://git-wip-us.apache.org/repos/asf/spark/blob/0efd691d/dev/create-release/translate-contributors.py
----------------------------------------------------------------------
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
index f3b1efd..86fa02d 100755
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@@ -43,14 +43,12 @@ if not JIRA_USERNAME or not JIRA_PASSWORD:
 if not GITHUB_API_TOKEN:
     sys.exit("GITHUB_API_TOKEN must be set")
 
-# Write new contributors list to <old_file_name>.new
+# Write new contributors list to <old_file_name>.final
 if not os.path.isfile(contributors_file_name):
     print "Contributors file %s does not exist!" % contributors_file_name
     print "Have you run ./generate-contributors.py yet?"
     sys.exit(1)
 contributors_file = open(contributors_file_name, "r")
-new_contributors_file_name = contributors_file_name + ".new"
-new_contributors_file = open(new_contributors_file_name, "w")
 warnings = []
 
 # In non-interactive mode, this script will choose the first replacement that is valid
@@ -73,7 +71,7 @@ known_translations_file_name = "known_translations"
 known_translations_file = open(known_translations_file_name, "r")
 for line in known_translations_file:
     if line.startswith("#"): continue
-    [old_name, new_name] = line.split(" - ")
+    [old_name, new_name] = line.strip("\n").split(" - ")
     known_translations[old_name] = new_name
 known_translations_file.close()
 
@@ -147,16 +145,16 @@ def generate_candidates(author, issues):
 # If no such name exists, the original name is used (without the JIRA numbers).
 print "\n========================== Translating contributor list =========================="
 lines = contributors_file.readlines()
+contributions = []
 for i, line in enumerate(lines):
-    temp_author = line.split(" - ")[0]
+    temp_author = line.strip(" * ").split(" -- ")[0]
     print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
     if not temp_author:
-        error_msg = "    ERROR: Expected the following format <author> - <contributions>\n"
+        error_msg = "    ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
         error_msg += "    ERROR: Actual = %s" % line
         print error_msg
         warnings.append(error_msg)
-        new_contributors_file.write(line)
-        new_contributors_file.flush()
+        contributions.append(line)
         continue
     author = temp_author.split("/")[0]
     # Use the local copy of known translations where possible
@@ -222,10 +220,26 @@ for i, line in enumerate(lines):
             known_translations_file.write("%s - %s\n" % (author, new_author))
             known_translations_file.flush()
         line = line.replace(temp_author, author)
-    new_contributors_file.write(line)
-    new_contributors_file.flush()
+    contributions.append(line)
 print "==================================================================================\n"
 contributors_file.close()
+known_translations_file.close()
+
+# Sort the contributions before writing them to the new file.
+# Additionally, check if there are any duplicate author rows.
+# This could happen if the same user has both a valid full
+# name (e.g. Andrew Or) and an invalid one (andrewor14).
+# If so, warn the user about this at the end.
+contributions.sort()
+all_authors = set()
+new_contributors_file_name = contributors_file_name + ".final"
+new_contributors_file = open(new_contributors_file_name, "w")
+for line in contributions:
+    author = line.strip(" * ").split(" -- ")[0]
+    if author in all_authors:
+        warnings.append("Detected duplicate author name %s. Please merge these manually." % author)
+    all_authors.add(author)
+    new_contributors_file.write(line)
 new_contributors_file.close()
 
 print "Translated contributors list successfully written to %s!" % new_contributors_file_name


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org