diff options
author | Mathias Preiner <mathias.preiner@gmail.com> | 2019-03-25 18:19:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-25 18:19:45 -0700 |
commit | ad1dd82d553957040b9126a85592e50d2ebbf3a8 (patch) | |
tree | 5541adfe2ecc960d5d469171499eb9ca863e3db0 /contrib | |
parent | a3dccac861e05e91d139a8c6da3b1605a068ff00 (diff) |
get-authors: Exclude common source code patterns. (#2900)
Exclude lines that #include header files and define namespaces.
Since we use git blame -C -M to determine the current top contributors,
git tries to match all #include and namespace definitions to an original
author, which is not accurate since these lines are usually not
copied over from other files.
Diffstat (limited to 'contrib')
-rwxr-xr-x | contrib/get-authors | 63 |
1 files changed, 42 insertions, 21 deletions
diff --git a/contrib/get-authors b/contrib/get-authors index 6ee3166e2..d2bd1e7f5 100755 --- a/contrib/get-authors +++ b/contrib/get-authors @@ -1,37 +1,57 @@ #!/bin/sh # # get-authors -# Copyright (c) 2009-2018 The CVC4 Project +# Copyright (c) 2009-2019 The CVC4 Project # # usage: get-authors [ files... ] # -# This script uses git to get the original author +# This script uses git blame -w -N -C to get the original author # gituser="`git config user.name` <`git config user.email`>" -if [ "$1" = "--email" ]; then - strip_email=cat - shift -else - strip_email="sed 's, *<[^>]*@[^>]*>,,g'" -fi - while [ $# -gt 0 ]; do f=$1 shift - contributors= - if [ -z "`grep " \*\* Top contributors" $f`" ] + if ! grep -q " \*\* Top contributors" "$f" then header_lines=0 else - header_lines=`grep "\*\*\/" $f -m 1 -n | cut -d ':' -f 1` + header_lines=$(grep "\*\*\/" "$f" -m 1 -n | cut -d ':' -f 1) if [ -z $header_lines ]; then header_lines=0; fi fi ((header_lines++)) - total_lines=`wc -l "$f" | awk '{print$1}'` - git blame -w -M -C --incremental -L $header_lines,$total_lines "$f" | \ - gawk '/^[0-9a-f]+ [0-9]+ [0-9]+ [0-9]+$/ {nl=$4;} /^summary .*copyright/ {nl=0} /^author / {$1=""; author=$0;} /^author-mail / {mail=$2} /^filename / {while(nl--) {print author,mail}}' | \ + total_lines=$(wc -l "$f" | awk '{print$1}') + + # Note: Instead of using the porcelain format, we extract the author name + # information from the humand readable format since it prints the source code + # and we want to exclude specific lines of code. + + # Each line looks a follows: + # + # sha1 filename (Author Name 2019-03-25 13:36:07 -0800 42) code ... + + git blame -w -M -C -L $header_lines,$total_lines "$f" | \ + + # Discard everthing left to first '(' + awk -F '(' '{print $2}' | \ + + # Discard the source code left to first ')' and omit lines that begin + # with: + # (1) #include + # (2) namespace + # (3) } ... namespace ... + # + awk -F ')' \ + '$2 !~ /^[ \t]*(#include|namespace|}.*namespace.*)/ {print $1}' | \ + + # Keep author names only, remove the last 4 columns in ( ... ) + awk 'NF{NF-=4};1' | \ + + # Determine top three contributors + sort | uniq -c | sort -rn | head -n3 | \ + + # Fix author names sed "s,Not Committed Yet <not.committed.yet>,$gituser," | \ sed 's/PaulMeng/Paul Meng/' | \ sed 's/barrettcw/Clark Barrett/' | \ @@ -45,10 +65,11 @@ while [ $# -gt 0 ]; do sed 's/Martin/Martin Brain/' | \ sed 's/justinxu421/Justin Xu/' | \ sed 's/yoni206/Yoni Zohar/' | \ - eval "$strip_email" | \ - sort | uniq -c | sort -nr | head -n 3 | \ - ( while read lines author; do - contributors="${contributors:+$contributors, }$author" - done; \ - echo "$contributors") + + # Remove first columns from uniq -c (number of lines) + awk '{$1=""; print}' | \ + + # Comma separated list of author names, remove leading whitespaces, and + # remove trailing comma + tr '\n' ', ' | sed 's/^[ \t]*//' | sed 's/,$/\n/' done |