summaryrefslogtreecommitdiff
path: root/examples/program_analysis/ui/lazy_structure_parser.py
blob: bae2dea346fb6ec38053871639d50fd4c709fd37 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""Parse a LazyStructure to extract abstractions for the UI."""
from collections import defaultdict
from lazy_structure import LazyGeneratedTextDocument

def parse_lazy_structure(structure):
    """Returns a dict representation of a LazyStructure.

    Returns a dictionary with
    {
        "documents": [
            {
                "text": str,
                "chunks": [(global_id, start, length)],
            }
        ],
        "maps": [
            [chunk_1_gid, chunk_2_gid, ...]
        ],
    }

    maps[i][j] lists all chunks corresponding to the jth node in abstraction i.
    """
    ts = structure.ts
    parsed = dict({"documents": [], "maps": []})
    # (1) Add the documents.
    all_chunk_nodes = set()
    for document in structure.documents:
        parsed_doc = dict({
            "text": document.text,
            "chunks": [],
            "generated": isinstance(document, LazyGeneratedTextDocument),
        })
        for chunk in document.chunks:
            chunk_node = structure.NodeOfChunk(document, chunk)
            if ts.has_node(chunk_node):
                start, length = chunk
                parsed_doc["chunks"].append((chunk_node, start, length))
                all_chunk_nodes.add(chunk_node)
        # Add fake chunks for the rest of the document.
        parsed_doc["chunks"] = pad_chunks(parsed_doc["chunks"], document)
        parsed["documents"].append(parsed_doc)
    # (2) Add the maps.
    abstract_nodes = defaultdict(set)
    for fact in ts.lookup(None, None, "/:Mapper:Abstraction"):
        for other_fact in ts.lookup(fact[1], None, None):
            if other_fact[1] in all_chunk_nodes:
                abstract_nodes[other_fact[2]].add(other_fact[1])
    parsed["maps"] = sorted(map(sorted, abstract_nodes.values()))
    return parsed

def pad_chunks(chunks, document):
    """Fills in gaps in @chunks.

    For example, @chunks may only contain chunks of the document which have
    actually been added to the structure. This iterates over @chunks, and
    anywhere a gap is found it inserts a new chunk with the node name as False.

    @chunks should be a list of triples (node_name, start, length). The return
    value is of the same format.
    """
    padded = [(False, 0, 0)]
    chunks = chunks + [(False, len(document.text), 0)]
    for (global_id, start, length) in chunks:
        last_end = padded[-1][1] + padded[-1][2]
        if last_end < start:
            padded.append((False, last_end, start - last_end))
        padded.append((global_id, start, length))
    return padded[1:-1]
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback