summaryrefslogtreecommitdiff
path: root/examples/program_analysis/analyzelib.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/program_analysis/analyzelib.py')
-rw-r--r--examples/program_analysis/analyzelib.py244
1 files changed, 244 insertions, 0 deletions
diff --git a/examples/program_analysis/analyzelib.py b/examples/program_analysis/analyzelib.py
new file mode 100644
index 0000000..b7177f3
--- /dev/null
+++ b/examples/program_analysis/analyzelib.py
@@ -0,0 +1,244 @@
+"""Helper methods for analyzing program source code."""
+from collections import defaultdict
+from tactic_utils import ApplyRulesMatching, SearchRules, GetMatcher, Fix
+from mapper import MapperCodelet
+from lazy_structure import LazyTextDocument, SPECIAL_CHARACTERS
+from ts_utils import RegisterRule, AssertNodesEqual
+from analogy_utils import Analogy
+
+def LoadDocument(path, extra_chunks=None, extra_special=None):
+ """Load a document from the file system into a LazyTextDocument.
+
+ @extra_chunks: list of ranges of characters in the document that should be
+ chunked together.
+ @extra_special: list of special tokens that should be chunked together.
+ """
+ extra_special = extra_special or []
+ with open(path, "r") as in_file:
+ text = in_file.readlines()
+ chunks = []
+ for start, length in (extra_chunks or []):
+ chunks.append("".join(text[start:(start + length)]).lstrip(" "))
+ text = "".join(text)
+ return LazyTextDocument(text, chunks + extra_special + SPECIAL_CHARACTERS)
+
+def CompleteAnalogyTactic(structure, sources):
+ """Heuristic for completing an analogy involving code transformations.
+
+ @sources should be a list of before/after LazyTextDocuments.
+ @sources[0][0] and @sources[1][0] should be the before-code of
+ @sources[0][1] and @sources[1][1] respectively.
+ @sources[2][0] should be the prompt before code, and @sources[2][1] should
+ be a currently-empty LazyTextDocument to be generated by Sifter.
+ """
+ structure.ts.commit()
+ print("Identifying word pairs...")
+ ApplyRulesMatching(structure.rt, "SameWord", dict())
+
+ print("Mapping the examples...")
+ analogy = Analogy.Begin(structure.rt, dict({
+ "/:Mapper:NoSlipRules:A":
+ structure.NodeOfChunk(sources[0][0], sources[0][0].chunks[0]),
+ "/:Mapper:NoSlipRules:B":
+ structure.NodeOfChunk(sources[1][0], sources[1][0].chunks[0]),
+ "/:Mapper:NoSlipRules:C": "/:Chunk",
+ }), extend_here=["/:Document"])
+ ExtendAnalogyTactic(structure, analogy)
+
+ print("Mapping the prompt...")
+ analogy = Analogy.Begin(structure.rt, dict({
+ "/:Mapper:NoSlipRules:A":
+ structure.NodeOfChunk(sources[0][0], sources[0][0].chunks[0]),
+ "/:Mapper:NoSlipRules:B":
+ structure.NodeOfChunk(sources[2][0], sources[2][0].chunks[0]),
+ "/:Mapper:NoSlipRules:C": "/:Chunk",
+ }), exists=True, extend_here=["/:Document"])
+ ExtendAnalogyTactic(structure, analogy)
+
+ print("Solving the prompt...")
+ analogy.state = "concretize"
+ Fix(analogy.ExtendMap, ["/:Document"])
+ Fix(analogy.ExtendMap)
+ Fix(analogy.ExtendMap, no_follow=True)
+ Fix(ApplyRulesMatching, structure.rt, "SolveWord", dict())
+
+def ExtendAnalogyTactic(structure, analogy):
+ """Tactic to extend an analogy involving source code.
+
+ See AnalogyUtils.md for the basic idea. This method describes a certain
+ ordering of nodes in the breadth-first-search. We first connect the
+ before-code to its documentation and after-code. Then we follow tokens that
+ are adjacent to each other. Finally, we look at any facts about words being
+ the same as each other. CrossDoc is used to identify associated chunks
+ across documents.
+ """
+ Fix(analogy.ExtendMap, ["/:TransformPair:Before", "/:Documentation:Code"])
+ Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"])
+ CrossDoc(structure, analogy)
+ Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"])
+ CrossDoc(structure, analogy)
+ Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"])
+ Fix(analogy.ExtendMap, ["/:Chunk", "/:SameWord"], no_follow=True)
+ # If we have no_top=False, then it will map Documents:...:_IsMember before
+ # we start concretizing. But then the concretization pass will completely
+ # ignore _IsMember, so it will never concretize the contents. Basically, we
+ # just want to deal with the TOP nodes later, when concretizing.
+ Fix(analogy.ExtendMap, no_follow=True, no_top=True)
+
+FOLLOW_RELATIONS = [
+ "/:Follower:Before", "/:Follower:After", "/:Document",
+ "/:TransformPair:After",
+]
+
+def CrossDoc(structure, analogy):
+ """Tactic to bootstrap an analogy on a new document.
+
+ Basically, the idea here is if you've already marked two tokens as
+ corresponding between Documents 0 and 2, we may want to use that
+ information to mark two different tokens as corresponding in Document 1 and
+ 3. We would usually want to do this by following facts like 'SameWord.' But
+ these facts may not be unique (there could be 100 instances of the same
+ token), so CrossDoc looks to only follow the facts that are unique. See
+ AnalogyUtils.md for a more complete description.
+ """
+ ts, rt = structure.ts, structure.rt
+ scope = ts.scope("/:Rules:TagCrossDoc:MustMap", protect=True)
+ no_slip = rt.ts.scope("/:Mapper:NoSlipRules", protect=True)
+
+ rule = SearchRules(rt, "Tag")[0]
+ new_partial = dict({
+ "/:Rules:TagCrossDoc:MustMap:MAlphaA": analogy.MAlphaA,
+ "/:Rules:TagCrossDoc:MustMap:MAlphaB": analogy.MAlphaB,
+ })
+ matcher = GetMatcher(rt, rule, new_partial)
+ matcher.sync()
+ found = defaultdict(set)
+ for assign in matcher.assignments():
+ assigned = assign.assignment
+ key = (assigned[scope[":ChunkA"]], assigned[scope[":ChunkMapA"]], assigned[scope[":DocA'"]])
+ found[key].add(assigned[scope[":ChunkMapB"]])
+ for assign in matcher.assignments():
+ assigned = assign.assignment
+ key = (assigned[scope[":ChunkA"]], assigned[scope[":ChunkMapA"]], assigned[scope[":DocA'"]])
+ if len(found[key]) != 1:
+ continue
+ analogy.ExtendMap(partial=dict({
+ no_slip[":A"]: assigned[scope[":ChunkA"]],
+ no_slip[":B"]: assigned[scope[":ChunkB"]],
+ no_slip[":MA"]: assigned[scope[":ChunkMapA"]],
+ no_slip[":MB"]: assigned[scope[":ChunkMapB"]],
+ no_slip[":C"]: assigned[scope[":ChunkType"]],
+ }))
+
+def AnalyzeCodelets(ts):
+ """Adds helper codelets to identify patterns used in the tactics above."""
+ mapper = MapperCodelet(ts)
+ ts.add_node("/:TransformPair:Before")
+ ts.add_node("/:TransformPair:After")
+ ts.add_node("/:Documentation:Code")
+ ts.add_node("/:Documentation:Docs")
+ with ts.scope("/:Rules:SameWord"):
+ with ts.scope(":MustMap") as exists:
+ ts[":MA"].map({ts[":A"]: ts[":Word"]})
+ ts[":MB"].map({ts[":B"]: ts[":Word"]})
+ ts[":IsWord"].map({ts[":Word"]: ts["/:Word"]})
+ ts[":AMember"].map({
+ ts[":A"]: ts["/:Chunk"],
+ ts[":DocA"]: ts["/:Document"],
+ })
+ ts[":BMember"].map({
+ ts[":B"]: ts["/:Chunk"],
+ ts[":DocB"]: ts["/:Document"],
+ })
+ ts[":AreRelated"].map({
+ ts[":DocA"]: ts[":Doc1Relation"],
+ ts[":DocB"]: ts[":Doc2Relation"],
+ })
+ with ts.scope(":NoMap:Insert"):
+ ts[":AreSame"].map({
+ exists[":A"]: ts["/:SameWord"],
+ exists[":B"]: ts["/:SameWord"],
+ })
+ # Some of these may be unnecessary, they seem to slow things down as
+ # well. For the transformation ones we don't need relations within a
+ # doc.
+ ts[":??"].map({
+ ts[":_"]: ts["/RULE"],
+ exists[":AMember"]: ts["/MAYBE="],
+ exists[":BMember"]: ts["/MAYBE="],
+ exists[":AreRelated"]: ts["/MAYBE="],
+ })
+ ts[":??"].map({
+ ts[":_"]: ts["/RULE"],
+ exists[":DocA"]: ts["/MAYBE="],
+ exists[":DocB"]: ts["/MAYBE="],
+ })
+ ts[":??"].map({
+ ts[":_"]: ts["/RULE"],
+ exists[":Doc1Relation"]: ts["/MAYBE="],
+ exists[":Doc2Relation"]: ts["/MAYBE="],
+ })
+ RegisterRule(ts)
+ with ts.scope("/:Rules:SolveWord"):
+ with ts.scope(":MustMap") as exists:
+ ts[":MA"].map({ts[":A"]: ts[":Word"]})
+ ts[":IsWord"].map({ts[":Word"]: ts["/:Word"]})
+ ts[":AreSame"].map({
+ ts[":A"]: ts["/:SameWord"],
+ ts[":B"]: ts["/:SameWord"],
+ })
+ with ts.scope(":NoMap:Insert"):
+ ts[":MB"].map({exists[":B"]: exists[":Word"]})
+ RegisterRule(ts)
+
+ """Crossdoc tagger.
+
+ The idea here is to find chunks ChunkA in document A and ChunkB in document
+ B which are currently mapped together. We then want to find documents A',
+ B' with chunks ChunkA' and ChunkB' such that A and A' are related in the
+ same way as B and B', and ChunkA and ChunkA' are related in the same ways
+ as ChunkB and ChunkB'.
+
+ Furthermore, we don't want there to be any _other_ ChunkA', ChunkB' which
+ are related in that way. I don't think it's actually possible to express
+ that as a TS rule using the current set up, so we'll enforce that later in
+ the Python tactic.
+ """
+ mapper = ts.scope("/:Mapper", protect=True)
+ with ts.scope("/:Rules:TagCrossDoc"):
+ with ts.scope(":MustMap") as exists:
+ for x in ("A", "B"):
+ ts[f":IsAbstraction{x}"].map({
+ ts[f":MAlpha{x}"]: ts[mapper[":Abstraction"]]
+ })
+ for x in ("A", "A'", "B", "B'"):
+ ts[f":Doc{x}IsDoc"].map({
+ ts[f":Doc{x}"]: ts["/:Document"],
+ ts[f":Chunk{x}"]: ts["/:Chunk"],
+ })
+ for x in ("A", "B"):
+ ts[f":DocMap{x}"].map({
+ ts[f":Doc{x}"]: ts[":DocType"],
+ ts[f":Doc{x}'"]: ts[":Doc'Type"],
+ })
+ ts[f":ChunkMap{x}"].map({
+ ts[f":Chunk{x}"]: ts[":ChunkType"],
+ ts[f":Chunk{x}'"]: ts[":Chunk'Type"],
+ })
+ ts[f":MAlpha{x}"].map({
+ ts[f":Doc{x}"]: ts[":AlphaDoc"],
+ ts[f":Doc{x}'"]: ts[":AlphaDoc'"],
+ ts[f":Chunk{x}"]: ts[":AlphaChunk"],
+ })
+ RegisterRule(ts)
+ equivalence_classes = [
+ [":ChunkType", ":Chunk'Type"],
+ [":DocType", ":Doc'Type"],
+ [":DocA", ":DocB", ":DocA'", ":DocB'"],
+ [":DocMapA", ":DocMapB"],
+ [":DocAIsDoc", ":DocBIsDoc", ":DocA'IsDoc", ":DocB'IsDoc"],
+ [":ChunkA", ":ChunkB", ":ChunkA'", ":ChunkB'"],
+ ]
+ for equivalence_class in equivalence_classes:
+ equivalence_class = [exists[name] for name in equivalence_class]
+ AssertNodesEqual(ts, equivalence_class, "", equal_type="/MAYBE=")
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback