summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorMathias Preiner <mathias.preiner@gmail.com>2019-03-25 18:19:45 -0700
committerGitHub <noreply@github.com>2019-03-25 18:19:45 -0700
commitad1dd82d553957040b9126a85592e50d2ebbf3a8 (patch)
tree5541adfe2ecc960d5d469171499eb9ca863e3db0 /contrib
parenta3dccac861e05e91d139a8c6da3b1605a068ff00 (diff)
get-authors: Exclude common source code patterns. (#2900)
Exclude lines that #include header files and define namespaces. Since we use git blame -C -M to determine the current top contributors, git tries to match all #include and namespace definitions to an original author, which is not accurate since these lines are usually not copied over from other files.
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/get-authors63
1 files changed, 42 insertions, 21 deletions
diff --git a/contrib/get-authors b/contrib/get-authors
index 6ee3166e2..d2bd1e7f5 100755
--- a/contrib/get-authors
+++ b/contrib/get-authors
@@ -1,37 +1,57 @@
#!/bin/sh
#
# get-authors
-# Copyright (c) 2009-2018 The CVC4 Project
+# Copyright (c) 2009-2019 The CVC4 Project
#
# usage: get-authors [ files... ]
#
-# This script uses git to get the original author
+# This script uses git blame -w -N -C to get the original author
#
gituser="`git config user.name` <`git config user.email`>"
-if [ "$1" = "--email" ]; then
- strip_email=cat
- shift
-else
- strip_email="sed 's, *<[^>]*@[^>]*>,,g'"
-fi
-
while [ $# -gt 0 ]; do
f=$1
shift
- contributors=
- if [ -z "`grep " \*\* Top contributors" $f`" ]
+ if ! grep -q " \*\* Top contributors" "$f"
then
header_lines=0
else
- header_lines=`grep "\*\*\/" $f -m 1 -n | cut -d ':' -f 1`
+ header_lines=$(grep "\*\*\/" "$f" -m 1 -n | cut -d ':' -f 1)
if [ -z $header_lines ]; then header_lines=0; fi
fi
((header_lines++))
- total_lines=`wc -l "$f" | awk '{print$1}'`
- git blame -w -M -C --incremental -L $header_lines,$total_lines "$f" | \
- gawk '/^[0-9a-f]+ [0-9]+ [0-9]+ [0-9]+$/ {nl=$4;} /^summary .*copyright/ {nl=0} /^author / {$1=""; author=$0;} /^author-mail / {mail=$2} /^filename / {while(nl--) {print author,mail}}' | \
+ total_lines=$(wc -l "$f" | awk '{print$1}')
+
+ # Note: Instead of using the porcelain format, we extract the author name
+ # information from the humand readable format since it prints the source code
+ # and we want to exclude specific lines of code.
+
+ # Each line looks a follows:
+ #
+ # sha1 filename (Author Name 2019-03-25 13:36:07 -0800 42) code ...
+
+ git blame -w -M -C -L $header_lines,$total_lines "$f" | \
+
+ # Discard everthing left to first '('
+ awk -F '(' '{print $2}' | \
+
+ # Discard the source code left to first ')' and omit lines that begin
+ # with:
+ # (1) #include
+ # (2) namespace
+ # (3) } ... namespace ...
+ #
+ awk -F ')' \
+ '$2 !~ /^[ \t]*(#include|namespace|}.*namespace.*)/ {print $1}' | \
+
+ # Keep author names only, remove the last 4 columns in ( ... )
+ awk 'NF{NF-=4};1' | \
+
+ # Determine top three contributors
+ sort | uniq -c | sort -rn | head -n3 | \
+
+ # Fix author names
sed "s,Not Committed Yet <not.committed.yet>,$gituser," | \
sed 's/PaulMeng/Paul Meng/' | \
sed 's/barrettcw/Clark Barrett/' | \
@@ -45,10 +65,11 @@ while [ $# -gt 0 ]; do
sed 's/Martin/Martin Brain/' | \
sed 's/justinxu421/Justin Xu/' | \
sed 's/yoni206/Yoni Zohar/' | \
- eval "$strip_email" | \
- sort | uniq -c | sort -nr | head -n 3 | \
- ( while read lines author; do
- contributors="${contributors:+$contributors, }$author"
- done; \
- echo "$contributors")
+
+ # Remove first columns from uniq -c (number of lines)
+ awk '{$1=""; print}' | \
+
+ # Comma separated list of author names, remove leading whitespaces, and
+ # remove trailing comma
+ tr '\n' ', ' | sed 's/^[ \t]*//' | sed 's/,$/\n/'
done
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback