"""Helper methods for analyzing program source code.""" from collections import defaultdict from tactic_utils import ApplyRulesMatching, SearchRules, GetMatcher, Fix from mapper import MapperCodelet from lazy_structure import LazyTextDocument, SPECIAL_CHARACTERS from ts_utils import RegisterRule, AssertNodesEqual from analogy_utils import Analogy def LoadDocument(path, extra_chunks=None, extra_special=None): """Load a document from the file system into a LazyTextDocument. @extra_chunks: list of ranges of characters in the document that should be chunked together. @extra_special: list of special tokens that should be chunked together. """ extra_special = extra_special or [] with open(path, "r") as in_file: text = in_file.readlines() chunks = [] for start, length in (extra_chunks or []): chunks.append("".join(text[start:(start + length)]).lstrip(" ")) text = "".join(text) return LazyTextDocument(text, chunks + extra_special + SPECIAL_CHARACTERS) def CompleteAnalogyTactic(structure, sources): """Heuristic for completing an analogy involving code transformations. @sources should be a list of before/after LazyTextDocuments. @sources[0][0] and @sources[1][0] should be the before-code of @sources[0][1] and @sources[1][1] respectively. @sources[2][0] should be the prompt before code, and @sources[2][1] should be a currently-empty LazyTextDocument to be generated by Sifter. """ structure.ts.commit() print("Identifying word pairs...") ApplyRulesMatching(structure.rt, "SameWord", dict()) print("Mapping the examples...") analogy = Analogy.Begin(structure.rt, dict({ "/:Mapper:NoSlipRules:A": structure.NodeOfChunk(sources[0][0], sources[0][0].chunks[0]), "/:Mapper:NoSlipRules:B": structure.NodeOfChunk(sources[1][0], sources[1][0].chunks[0]), "/:Mapper:NoSlipRules:C": "/:Chunk", }), extend_here=["/:Document"]) ExtendAnalogyTactic(structure, analogy) print("Mapping the prompt...") analogy = Analogy.Begin(structure.rt, dict({ "/:Mapper:NoSlipRules:A": structure.NodeOfChunk(sources[0][0], sources[0][0].chunks[0]), "/:Mapper:NoSlipRules:B": structure.NodeOfChunk(sources[2][0], sources[2][0].chunks[0]), "/:Mapper:NoSlipRules:C": "/:Chunk", }), exists=True, extend_here=["/:Document"]) ExtendAnalogyTactic(structure, analogy) print("Solving the prompt...") analogy.state = "concretize" Fix(analogy.ExtendMap, ["/:Document"]) Fix(analogy.ExtendMap) Fix(analogy.ExtendMap, no_follow=True) Fix(ApplyRulesMatching, structure.rt, "SolveWord", dict()) def ExtendAnalogyTactic(structure, analogy): """Tactic to extend an analogy involving source code. See AnalogyUtils.md for the basic idea. This method describes a certain ordering of nodes in the breadth-first-search. We first connect the before-code to its documentation and after-code. Then we follow tokens that are adjacent to each other. Finally, we look at any facts about words being the same as each other. CrossDoc is used to identify associated chunks across documents. """ Fix(analogy.ExtendMap, ["/:TransformPair:Before", "/:Documentation:Code"]) Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"]) CrossDoc(structure, analogy) Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"]) CrossDoc(structure, analogy) Fix(analogy.ExtendMap, ["/:Follower:Before", "/:Follower:After"]) Fix(analogy.ExtendMap, ["/:Chunk", "/:SameWord"], no_follow=True) # If we have no_top=False, then it will map Documents:...:_IsMember before # we start concretizing. But then the concretization pass will completely # ignore _IsMember, so it will never concretize the contents. Basically, we # just want to deal with the TOP nodes later, when concretizing. Fix(analogy.ExtendMap, no_follow=True, no_top=True) FOLLOW_RELATIONS = [ "/:Follower:Before", "/:Follower:After", "/:Document", "/:TransformPair:After", ] def CrossDoc(structure, analogy): """Tactic to bootstrap an analogy on a new document. Basically, the idea here is if you've already marked two tokens as corresponding between Documents 0 and 2, we may want to use that information to mark two different tokens as corresponding in Document 1 and 3. We would usually want to do this by following facts like 'SameWord.' But these facts may not be unique (there could be 100 instances of the same token), so CrossDoc looks to only follow the facts that are unique. See AnalogyUtils.md for a more complete description. """ ts, rt = structure.ts, structure.rt scope = ts.scope("/:Rules:TagCrossDoc:MustMap", protect=True) no_slip = rt.ts.scope("/:Mapper:NoSlipRules", protect=True) rule = SearchRules(rt, "Tag")[0] new_partial = dict({ "/:Rules:TagCrossDoc:MustMap:MAlphaA": analogy.MAlphaA, "/:Rules:TagCrossDoc:MustMap:MAlphaB": analogy.MAlphaB, }) matcher = GetMatcher(rt, rule, new_partial) matcher.sync() found = defaultdict(set) for assign in matcher.assignments(): assigned = assign.assignment key = (assigned[scope[":ChunkA"]], assigned[scope[":ChunkMapA"]], assigned[scope[":DocA'"]]) found[key].add(assigned[scope[":ChunkMapB"]]) for assign in matcher.assignments(): assigned = assign.assignment key = (assigned[scope[":ChunkA"]], assigned[scope[":ChunkMapA"]], assigned[scope[":DocA'"]]) if len(found[key]) != 1: continue analogy.ExtendMap(partial=dict({ no_slip[":A"]: assigned[scope[":ChunkA"]], no_slip[":B"]: assigned[scope[":ChunkB"]], no_slip[":MA"]: assigned[scope[":ChunkMapA"]], no_slip[":MB"]: assigned[scope[":ChunkMapB"]], no_slip[":C"]: assigned[scope[":ChunkType"]], })) def AnalyzeCodelets(ts): """Adds helper codelets to identify patterns used in the tactics above.""" mapper = MapperCodelet(ts) ts.add_node("/:TransformPair:Before") ts.add_node("/:TransformPair:After") ts.add_node("/:Documentation:Code") ts.add_node("/:Documentation:Docs") with ts.scope("/:Rules:SameWord"): with ts.scope(":MustMap") as exists: ts[":MA"].map({ts[":A"]: ts[":Word"]}) ts[":MB"].map({ts[":B"]: ts[":Word"]}) ts[":IsWord"].map({ts[":Word"]: ts["/:Word"]}) ts[":AMember"].map({ ts[":A"]: ts["/:Chunk"], ts[":DocA"]: ts["/:Document"], }) ts[":BMember"].map({ ts[":B"]: ts["/:Chunk"], ts[":DocB"]: ts["/:Document"], }) ts[":AreRelated"].map({ ts[":DocA"]: ts[":Doc1Relation"], ts[":DocB"]: ts[":Doc2Relation"], }) with ts.scope(":NoMap:Insert"): ts[":AreSame"].map({ exists[":A"]: ts["/:SameWord"], exists[":B"]: ts["/:SameWord"], }) # Some of these may be unnecessary, they seem to slow things down as # well. For the transformation ones we don't need relations within a # doc. ts[":??"].map({ ts[":_"]: ts["/RULE"], exists[":AMember"]: ts["/MAYBE="], exists[":BMember"]: ts["/MAYBE="], exists[":AreRelated"]: ts["/MAYBE="], }) ts[":??"].map({ ts[":_"]: ts["/RULE"], exists[":DocA"]: ts["/MAYBE="], exists[":DocB"]: ts["/MAYBE="], }) ts[":??"].map({ ts[":_"]: ts["/RULE"], exists[":Doc1Relation"]: ts["/MAYBE="], exists[":Doc2Relation"]: ts["/MAYBE="], }) RegisterRule(ts) with ts.scope("/:Rules:SolveWord"): with ts.scope(":MustMap") as exists: ts[":MA"].map({ts[":A"]: ts[":Word"]}) ts[":IsWord"].map({ts[":Word"]: ts["/:Word"]}) ts[":AreSame"].map({ ts[":A"]: ts["/:SameWord"], ts[":B"]: ts["/:SameWord"], }) with ts.scope(":NoMap:Insert"): ts[":MB"].map({exists[":B"]: exists[":Word"]}) RegisterRule(ts) """Crossdoc tagger. The idea here is to find chunks ChunkA in document A and ChunkB in document B which are currently mapped together. We then want to find documents A', B' with chunks ChunkA' and ChunkB' such that A and A' are related in the same way as B and B', and ChunkA and ChunkA' are related in the same ways as ChunkB and ChunkB'. Furthermore, we don't want there to be any _other_ ChunkA', ChunkB' which are related in that way. I don't think it's actually possible to express that as a TS rule using the current set up, so we'll enforce that later in the Python tactic. """ mapper = ts.scope("/:Mapper", protect=True) with ts.scope("/:Rules:TagCrossDoc"): with ts.scope(":MustMap") as exists: for x in ("A", "B"): ts[f":IsAbstraction{x}"].map({ ts[f":MAlpha{x}"]: ts[mapper[":Abstraction"]] }) for x in ("A", "A'", "B", "B'"): ts[f":Doc{x}IsDoc"].map({ ts[f":Doc{x}"]: ts["/:Document"], ts[f":Chunk{x}"]: ts["/:Chunk"], }) for x in ("A", "B"): ts[f":DocMap{x}"].map({ ts[f":Doc{x}"]: ts[":DocType"], ts[f":Doc{x}'"]: ts[":Doc'Type"], }) ts[f":ChunkMap{x}"].map({ ts[f":Chunk{x}"]: ts[":ChunkType"], ts[f":Chunk{x}'"]: ts[":Chunk'Type"], }) ts[f":MAlpha{x}"].map({ ts[f":Doc{x}"]: ts[":AlphaDoc"], ts[f":Doc{x}'"]: ts[":AlphaDoc'"], ts[f":Chunk{x}"]: ts[":AlphaChunk"], }) RegisterRule(ts) equivalence_classes = [ [":ChunkType", ":Chunk'Type"], [":DocType", ":Doc'Type"], [":DocA", ":DocB", ":DocA'", ":DocB'"], [":DocMapA", ":DocMapB"], [":DocAIsDoc", ":DocBIsDoc", ":DocA'IsDoc", ":DocB'IsDoc"], [":ChunkA", ":ChunkB", ":ChunkA'", ":ChunkB'"], ] for equivalence_class in equivalence_classes: equivalence_class = [exists[name] for name in equivalence_class] AssertNodesEqual(ts, equivalence_class, "", equal_type="/MAYBE=")