summaryrefslogtreecommitdiff
path: root/dblpify.py
blob: c33d82b61f72fdf033003ccb7b381cd3d86d9e4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""Helper script to automatically standardize a BibTeX database.
"""
import os
import sys
import urllib
import bibtexparser
import dblp

def main(bib_file):
    """Standardize the file given by @bib_file relative to working directory.
    """
    bib_file = os.environ["BUILD_WORKING_DIRECTORY"] + "/" + bib_file
    with open(bib_file, "r") as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    # [name].bib -> [name].dblp.bib
    out_file = open(bib_file[:-4] + ".dblp.bib", "w")

    for i, entry in enumerate(bib_database.entries):
        print("Entry:", i + 1, "/", len(bib_database.entries))
        print("Title:", entry.get("title", "[None]"))
        print("Author:", entry.get("author", "[None]"))
        bibtex_id = entry["ID"]
        results = search_dblp(entry["title"])

        if not results.empty:
            result = select_row(results)

        if results.empty or result is None:
            out_file.write("\n% COULD NOT FIND " + entry["ID"]
                           + ": " + entry["title"]
                           + " by " + entry["author"] + "} \n")
            continue

        bibtex_url = f"https://dblp.uni-trier.de/rec/{result.Id}.bib?param=1"
        bibtex_entry = urllib.request.urlopen(bibtex_url).read().decode("utf-8")
        bibtex_entry = set_id(bibtex_entry, bibtex_id)
        print(bibtex_entry)
        out_file.write(bibtex_entry)
    out_file.close()

def search_dblp(title):
    """Given a paper title, attempt to search for it on DBLP.

    If @title does not match anything, we iteratively loosen our search
    constraints by dropping the last word of the title until results are found.
    """
    title = title.replace("{", "").replace("}", "")
    results = dblp.search([title])
    if results.empty:
        return search_dblp(" ".join(title.split(" ")[:-1]))
    return results

def set_id(bibtex_entry, ID):
    """Given a (string) BibTeX entry, replace its identifier with @ID.
    """
    first_bracket = bibtex_entry.index("{") + 1
    first_comma = bibtex_entry.index(",")
    return bibtex_entry[:first_bracket] + ID + bibtex_entry[first_comma:]

def select_row(results):
    """Given a DataFrame of DBLP entries, prompt the user to select one.

    If the user enters -1, this function will return None.
    """
    print(results[["Type", "Title", "Authors", "Where"]])
    try:
        row = int(input("Select a row (or -1 for none): "))
        if 0 <= row < len(results):
            result = results.iloc[row]
            return result
        if row == -1:
            return None
    except ValueError:
        pass
    return select_row(results)

if __name__ == "__main__":
    main(sys.argv[1])
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback