Skip to content

Commit

Permalink
doc(dfg): a lot of additional documentation for dfg info
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed Dec 5, 2024
1 parent 4a75197 commit d1aa9f3
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 29 deletions.
49 changes: 42 additions & 7 deletions src/dataflow/environments/identifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@ export type Identifier = string & { __brand?: 'identifier' }

/**
* Each reference only has exactly one reference type, stored as the respective number.
* However, when checking we may want to allow for one of several types,
* However, when checking, we may want to allow for one of several types,
* allowing the combination of the respective bitmasks.
*
* Having reference types is important as R separates a variable definition from
* a function when resolving {@link Identifier|identifier}.
* In `c <- 3; print(c(1, 2))` the call to `c` works normally (as the vector constructor),
* while writing `c <- function(...) ..1` overshadows the built-in and causes `print` to only output the first element.
*
* @see {@link isReferenceType} - for checking if a (potentially joint) reference type contains a certain type
* @see {@link ReferenceTypeReverseMapping} - for debugging
*/
export enum ReferenceType {
/** The identifier type is unknown */
Expand All @@ -28,6 +36,7 @@ export enum ReferenceType {
BuiltInFunction = 128
}

/** Reverse mapping of the reference types so you can get the name from the bitmask (useful for debugging) */
export const ReferenceTypeReverseMapping = new Map<ReferenceType, string>(Object.entries(ReferenceType).map(([k, v]) => [v as ReferenceType, k]));

/**
Expand All @@ -40,12 +49,24 @@ export function isReferenceType(t: ReferenceType, target: ReferenceType): boolea
export type InGraphReferenceType = Exclude<ReferenceType, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction>

/**
* Something like `a` in `b <- a`.
* Without any surrounding information, `a` will produce the identifier reference `a`.
* Similarly, `b` will create a reference.
* An identifier reference points to a variable like `a` in `b <- a`.
* Without any surrounding code, `a` will produce the identifier reference `a`.
* Similarly, `b` will create a reference (although it will be an {@link IdentifierDefinition|identifier definition}
* which adds even more information).
*
* In general,
* references are merely pointers (with meta-information) to a vertex in the {@link DataflowGraph|dataflow graph}.
* In the context of the extractor, for example,
* they indicate the references that are currently (during the analysis at this given node)
* {@link DataflowInformation#in|read (`in`)}, {@link DataflowInformation#out|written (`out`)},
* or {@link DataflowInformation#unknownReferences|unknown (`unknownReferences`)}.
*
* @see {@link InGraphIdentifierDefinition}
*/
export interface IdentifierReference {
/** Node which represents the reference in the AST */
/**
* The id of the node which represents the reference in the {@link NormalizedAst|normalized AST} and the {@link DataflowGraph|dataflow graph}.
*/
readonly nodeId: NodeId
/** Name the reference is identified by (e.g., the name of the variable), undefined if the reference is "artificial" (e.g., anonymous) */
readonly name: Identifier | undefined
Expand All @@ -58,14 +79,28 @@ export interface IdentifierReference {
controlDependencies: ControlDependency[] | undefined
}


/**
* The definition of an {@link Identifier|identifier} within the {@link DataflowGraph|graph}.
* This extends on the {@link IdentifierReference}
* by adding the {@link NodeId} of the definition
* (and using `type` to mark the object type).
*
* Within a code snippet like `a <- 3`, the symbol processor will first create an
* {@link IdentifierReference|identifier reference} for `a` to reference the use
* and then promote it to an {@link InGraphIdentifierDefinition|identifier definition}.
*
* @see {@link IdentifierReference}
*/
interface InGraphIdentifierDefinition extends IdentifierReference {
readonly type: InGraphReferenceType
/** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */
readonly definedAt: NodeId
}

/**
* Stores the definition of an identifier within an {@link IEnvironment}
* Stores the definition of an identifier within an {@link IEnvironment}.
*
* {@link BuiltInIdentifierDefinition} and {@link BuiltInIdentifierConstant} are used for built-in functions and constants only,
* so the most important one for your day-to-day R script is the {@link InGraphIdentifierDefinition}.
*/
export type IdentifierDefinition = InGraphIdentifierDefinition | BuiltInIdentifierDefinition | BuiltInIdentifierConstant
12 changes: 11 additions & 1 deletion src/dataflow/graph/graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ export interface DataflowGraphJson {
readonly edgeInformation: [NodeId, [NodeId, DataflowGraphEdge][]][]
}

/**
* An unknown side effect describes something that we cannot handle correctly (in all cases).
* For example, `eval` will be marked as an unknown side effect as we have no idea of how it will affect the program.
* Linked side effects are used whenever we know that a call may be affected by another one in a way that we cannot
* grasp from the dataflow perspective (e.g., an indirect dependency based on the currently active graphic device).
*/
export type UnknownSidEffect = NodeId | { id: NodeId, linkTo: LinkTo<RegExp> }

/**
Expand All @@ -124,7 +130,11 @@ export type UnknownSidEffect = NodeId | { id: NodeId, linkTo: LinkTo<RegExp> }
* However, this does not have to hold during the construction as edges may point from or to vertices which are yet to be constructed.
*
* All methods return the modified graph to allow for chaining.
* You can use {@link DataflowGraph#fromJson} to construct a dataflow graph object from a deserialized JSON object.
*
* @see {@link DataflowGraph#addEdge|`addEdge`} - to add an edge to the graph
* @see {@link DataflowGraph#addVertex|`addVertex`} - to add a vertex to the graph
* @see {@link DataflowGraph#fromJson|`fromJson`} - to construct a dataflow graph object from a deserialized JSON object.
* @see {@link emptyGraph} - to create an empty graph (useful in tests)
*/
export class DataflowGraph<
Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo,
Expand Down
48 changes: 41 additions & 7 deletions src/dataflow/graph/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export enum VertexType {
}

/**
* Arguments required to construct a vertex in the dataflow graph.
* Arguments required to construct a vertex in the {@link DataflowGraph|dataflow graph}.
*
* @see DataflowGraphVertexUse
* @see DataflowGraphVertexVariableDefinition
Expand All @@ -26,15 +26,17 @@ interface DataflowGraphVertexBase extends MergeableRecord {
*/
readonly tag: VertexType
/**
* The id of the node (the id assigned by the {@link ParentInformation} decoration)
* The id of the node (the id assigned by the {@link ParentInformation} decoration).
* This unanimously identifies the vertex in the {@link DataflowGraph|dataflow graph}
* as well as the corresponding {@link NormalizedAst|normalized AST}.
*/
id: NodeId
/**
* The environment in which the vertex is set.
*/
environment?: REnvironmentInformation | undefined
/**
* See {@link IdentifierReference}
* @see {@link ControlDependency} - the collection of control dependencies which have an influence on whether the vertex is executed.
*/
controlDependencies: ControlDependency[] | undefined
}
Expand All @@ -56,15 +58,20 @@ interface DataflowGraphVertexBase extends MergeableRecord {
* This then returns the corresponding node in the {@link NormalizedAst|normalized AST}, for example,
* an {@link RNumber} or {@link RString}.
*
* This works similarly for {@link IdentifierReference|reference} for which you can use the `id`.
* This works similarly for {@link IdentifierReference|identifier references}
* for which you can use the {@link IdentifierReference#nodeId|`nodeId`}.
*
* @see {@link isValueVertex} - to check if a vertex is a value vertex
*/
export interface DataflowGraphVertexValue extends DataflowGraphVertexBase {
readonly tag: VertexType.Value
readonly environment?: undefined
}

/**
* Arguments required to construct a vertex which represents the usage of a variable in the dataflow graph.
* Arguments required to construct a vertex which represents the usage of a variable in the {@link DataflowGraph|dataflow graph}.
*
* @see {@link isUseVertex} - to check if a vertex is a use vertex
*/
export interface DataflowGraphVertexUse extends DataflowGraphVertexBase {
readonly tag: VertexType.Use
Expand All @@ -73,7 +80,9 @@ export interface DataflowGraphVertexUse extends DataflowGraphVertexBase {
}

/**
* Arguments required to construct a vertex which represents the usage of a variable in the dataflow graph.
* Arguments required to construct a vertex which represents the usage of a variable in the {@link DataflowGraph|dataflow graph}.
*
* @see {@link isFunctionCallVertex} - to check if a vertex is a function call vertex
*/
export interface DataflowGraphVertexFunctionCall extends DataflowGraphVertexBase {
readonly tag: VertexType.FunctionCall
Expand All @@ -93,14 +102,21 @@ export interface DataflowGraphVertexFunctionCall extends DataflowGraphVertexBase
}

/**
* Arguments required to construct a vertex which represents the definition of a variable in the dataflow graph.
* Arguments required to construct a vertex which represents the definition of a variable in the {@link DataflowGraph|dataflow graph}.
*
* @see {@link isVariableDefinitionVertex} - to check if a vertex is a variable definition vertex
*/
export interface DataflowGraphVertexVariableDefinition extends DataflowGraphVertexBase {
readonly tag: VertexType.VariableDefinition
/** Does not require an environment, those are attached to the call */
readonly environment?: undefined
}

/**
* Arguments required to construct a vertex which represents the definition of a function in the {@link DataflowGraph|dataflow graph}.
*
* @see {@link isFunctionDefinitionVertex} - to check if a vertex is a function definition vertex
*/
export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVertexBase {
readonly tag: VertexType.FunctionDefinition
/**
Expand Down Expand Up @@ -130,25 +146,43 @@ export type DataflowGraphVertexArgument = DataflowGraphVertexUse | DataflowGraph
*/
export type DataflowGraphVertexInfo = Required<DataflowGraphVertexArgument>

/**
* A mapping of {@link NodeId}s to {@link DataflowGraphVertexInfo|vertices}.
*/
export type DataflowGraphVertices<Vertex extends DataflowGraphVertexInfo = DataflowGraphVertexInfo> = Map<NodeId, Vertex>


/**
* Check if the given vertex is a {@link DataflowGraphVertexValue|value vertex}.
*/
export function isValueVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexValue {
return vertex.tag === VertexType.Value;
}

/**
* Check if the given vertex is a {@link DataflowGraphVertexUse|use vertex}.
*/
export function isUseVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexUse {
return vertex.tag === VertexType.Use;
}

/**
* Check if the given vertex is a {@link DataflowGraphVertexFunctionCall|function call vertex}.
*/
export function isFunctionCallVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexFunctionCall {
return vertex.tag === VertexType.FunctionCall;
}

/**
* Check if the given vertex is a {@link DataflowGraphVertexVariableDefinition|variable definition vertex}.
*/
export function isVariableDefinitionVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexVariableDefinition {
return vertex.tag === VertexType.VariableDefinition;
}

/**
* Check if the given vertex is a {@link DataflowGraphVertexFunctionDefinition|function definition vertex}.
*/
export function isFunctionDefinitionVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexFunctionDefinition {
return vertex.tag === VertexType.FunctionDefinition;
}
Expand Down
95 changes: 82 additions & 13 deletions src/dataflow/info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,27 @@ import type { REnvironmentInformation } from './environments/environment';
import { DataflowGraph } from './graph/graph';
import type { GenericDifferenceInformation, WriteableDifferenceReport } from '../util/diff';


/**
* A control dependency links a vertex to the control flow element which
* may have an influence on its execution.
* Within `if(p) a else b`, `a` and `b` have a control dependency on the `if` (which in turn decides based on `p`).
*
* @see {@link happensInEveryBranch} - to check whether a list of control dependencies is exhaustive
*/
export interface ControlDependency {
/** The id of the node that causes the control dependency to be active (e.g., the condition of an if) */
readonly id: NodeId,
/** when does this control dependency trigger (if the condition is true or false)? */
readonly when?: boolean
}


/**
* Classifies the type of exit point encountered.
*
* @see {@link ExitPoint}
*/
export const enum ExitPointType {
/** The exit point is the implicit (last executed expression of a function/block) */
Default = 0,
Expand All @@ -16,23 +37,32 @@ export const enum ExitPointType {
Next = 3
}

export interface ControlDependency {
/** The id of the node that causes the control dependency to be active (e.g., the condition of an if) */
readonly id: NodeId,
/** when does this control dependency trigger (if the condition is true or false)? */
readonly when?: boolean
}


/**
* An exit point describes the position which ends the current control flow structure.
* This may be as innocent as the last expression or explicit with a `return`/`break`/`next`.
*
* @see {@link ExitPointType} - for the different types of exit points
* @see {@link addNonDefaultExitPoints} - to easily modify lists of exit points
* @see {@link alwaysExits} - to check whether a list of control dependencies always triggers an exit
* @see {@link filterOutLoopExitPoints} - to remove loop exit points from a list
*/
export interface ExitPoint {
/** What kind of exit point is this one? May be used to filter for exit points of specific causes. */
readonly type: ExitPointType,
/** The id of the node which causes the exit point! */
readonly nodeId: NodeId,
/** Control dependencies which influence if the exit point triggers (e.g., if the `return` is contained within an `if` statement) */
/**
* Control dependencies which influence if the exit point triggers
* (e.g., if the `return` is contained within an `if` statement).
*
* @see {@link happensInEveryBranch} - to check whether control dependencies are exhaustive
*/
readonly controlDependencies: ControlDependency[] | undefined
}

/**
* Adds all non-default exit points to the existing list.
*/
export function addNonDefaultExitPoints(existing: ExitPoint[], add: readonly ExitPoint[]): void {
existing.push(...add.filter(({ type }) => type !== ExitPointType.Default));
}
Expand All @@ -46,22 +76,50 @@ export interface DataflowCfgInformation {
}

/**
* The dataflow information is continuously updated during the dataflow analysis
* The dataflow information is one of the fundamental structures we have in the dataflow analysis.
* It is continuously updated during the dataflow analysis
* and holds its current state for the respective subtree processed.
* Each processor during the dataflow analysis may use the information from its children
* to produce a new state of the dataflow information.
*
* You may initialize a new dataflow information with {@link initializeCleanDataflowInformation}.
*
* @see {@link DataflowCfgInformation} - the control flow aspects
*/
export interface DataflowInformation extends DataflowCfgInformation {
/** References that have not been identified as read or write and will be so on higher */
/**
* References that have not been identified as read or write and will be so on higher processors.
*
* For example, when we analyze the `x` vertex in `x <- 3`, we will first create an unknown reference for `x`
* as we have not yet seen the assignment!
*
* @see {@link IdentifierReference} - a reference on a variable, parameter, function call, ...
*/
unknownReferences: readonly IdentifierReference[]
/** References which are read */
/**
* References which are read within the current subtree.
*
* @see {@link IdentifierReference} - a reference on a variable, parameter, function call, ...
* */
in: readonly IdentifierReference[]
/** References which are written to */
/**
* References which are written to within the current subtree
*
* @see {@link IdentifierReference} - a reference on a variable, parameter, function call, ...
*/
out: readonly IdentifierReference[]
/** Current environments used for name resolution, probably updated on the next expression-list processing */
environment: REnvironmentInformation
/** The current constructed dataflow graph */
graph: DataflowGraph
}

/**
* Initializes an empty {@link DataflowInformation} object with the given entry point and data.
* This is to be used as a "starting point" when processing leaf nodes during the dataflow extraction.
*
* @see {@link DataflowInformation}
*/
export function initializeCleanDataflowInformation<T>(entryPoint: NodeId, data: Pick<DataflowProcessorInformation<T>, 'environment' | 'completeAst'>): DataflowInformation {
return {
unknownReferences: [],
Expand All @@ -74,6 +132,10 @@ export function initializeCleanDataflowInformation<T>(entryPoint: NodeId, data:
};
}

/**
* Checks whether the given control dependencies are exhaustive (i.e. if for every control dependency on a boolean,
* the list contains a dependency on the `true` and on the `false` case).
*/
export function happensInEveryBranch(controlDependencies: readonly ControlDependency[] | undefined): boolean {
if(controlDependencies === undefined) {
/* the cds are unconstrained */
Expand All @@ -97,12 +159,19 @@ export function happensInEveryBranch(controlDependencies: readonly ControlDepend
return trues.every(id => falseSet.has(id));
}

/**
* Checks whether the given dataflow information always exits (i.e., if there is a non-default exit point in every branch).
* @see {@link ExitPoint} - for the different types of exit points
*/
export function alwaysExits(data: DataflowInformation): boolean {
return data.exitPoints?.some(
e => e.type !== ExitPointType.Default && happensInEveryBranch(e.controlDependencies)
) ?? false;
}

/**
* Filters out exit points which end their cascade within a loop.
*/
export function filterOutLoopExitPoints(exitPoints: readonly ExitPoint[]): readonly ExitPoint[] {
return exitPoints.filter(({ type }) => type === ExitPointType.Return || type === ExitPointType.Default);
}
Expand Down
Loading

0 comments on commit d1aa9f3

Please sign in to comment.