Skip to content

Commit

Permalink
feat(nested-list): recursively resolve nested access
Browse files Browse the repository at this point in the history
  • Loading branch information
Slartibartfass2 committed Dec 13, 2024
1 parent c1b9327 commit c578925
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 73 deletions.
42 changes: 40 additions & 2 deletions src/dataflow/graph/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,57 @@ export enum VertexType {
FunctionDefinition = 'function-definition'
}

/**
* A single index of a container, which is not a container itself.
*
* This can be e.g. a string, number or boolean index.
*/
export interface ContainerLeafIndex {
/**
* Destinctive lexeme of index e.g 'name' for `list(name = 'John')`
*/
readonly lexeme: string,

/**
* NodeId of index in graph.
*/
readonly nodeId: NodeId,
}

/**
* A single index of a container, which is a container itself.
*
* This can be e.g. a list, vector or data frame.
*/
export interface ContainerParentIndex extends ContainerLeafIndex {
readonly subIndices: ContainerIndex[],
/**
* Sub-indices of index.
*/
readonly subIndices: ContainerIndices[],
}

/**
* A single index of a container.
*/
export type ContainerIndex = ContainerLeafIndex | ContainerParentIndex;

/**
* List of indices of a single statement.
*/
export interface ContainerIndices {
readonly indices: ContainerIndex[],
// Differentiate between single and multiple indices (a list with one index is not a single index)
/**
* Differentiate between single and multiple indices.
*
* For `list(name = 'John')` `isSingleIndex` would be true, because a list may define more than one index.
* `isSingleIndex` is true for e.g. single index assignments like `person$name <- 'John'`.
*/
readonly isSingleIndex: boolean,
}

/**
* Collection of Indices of several statements.
*/
export type ContainerIndicesCollection = ContainerIndices[] | undefined

/**
Expand Down
125 changes: 102 additions & 23 deletions src/dataflow/internal/process/functions/call/built-in/built-in-access.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@ import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/proce
import { dataflowLogger } from '../../../../../logger';
import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type';
import { EdgeType } from '../../../../../graph/edge';
import type { REnvironmentInformation } from '../../../../../environments/environment';
import { makeAllMaybe, makeReferenceMaybe } from '../../../../../environments/environment';
import type { ForceArguments } from '../common';
import { BuiltIn } from '../../../../../environments/built-in';
import { markAsAssignment } from './built-in-assignment';
import { ReferenceType } from '../../../../../environments/identifier';
import type { InGraphIdentifierDefinition } from '../../../../../environments/identifier';
import { resolveByName } from '../../../../../environments/resolve-by-name';
import type { ContainerIndex, ContainerIndicesCollection, ContainerParentIndex } from '../../../../../graph/vertex';
import type { ContainerIndicesCollection, ContainerParentIndex } from '../../../../../graph/vertex';
import type { RArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-argument';
import { RoleInParent } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/role';
import type { Base } from '../../../../../../r-bridge/lang-4.x/ast/model/model';

interface TableAssignmentProcessorMarker {
definitionRootNodes: NodeId[]
Expand Down Expand Up @@ -122,7 +125,7 @@ function processNumberBasedAccess<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation, string>,
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
rootId: NodeId,
config: { treatIndicesAsString: boolean; } & ForceArguments,
config: ForceArguments,
head: RArgument<OtherInfo & ParentInformation>,
) {
const existing = data.environment.current.memory.get(':=');
Expand Down Expand Up @@ -165,7 +168,7 @@ function processStringBasedAccess<OtherInfo>(
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
name: RSymbol<OtherInfo & ParentInformation, string>,
rootId: NodeId,
config: { treatIndicesAsString: boolean; } & ForceArguments,
config: ForceArguments,
) {
const newArgs = [...args];
// if the argument is a symbol, we convert it to a string for this perspective
Expand All @@ -188,41 +191,117 @@ function processStringBasedAccess<OtherInfo>(
}
}

const fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data, forceArgs: config.forceArgs });

// Resolve access on the way up the fold
const nonEmptyArgs = newArgs.filter(arg => arg !== EmptyArgument);
const accessedArg = nonEmptyArgs.find(arg => arg.info.role === RoleInParent.Accessed);
const accessArg = nonEmptyArgs.find(arg => arg.info.role === RoleInParent.IndexAccess);

if(accessedArg === undefined || accessArg === undefined) {
return fnCall;
}

let accessedIndicesCollection: ContainerIndicesCollection;
if(newArgs[0] !== EmptyArgument) {
const accessArg = newArgs[1] === EmptyArgument ? undefined : newArgs[1].lexeme;
const resolvedFirstParameter = resolveByName(newArgs[0].lexeme ?? '', data.environment);
const indicesCollection = resolvedFirstParameter?.flatMap(param => (param as InGraphIdentifierDefinition)?.indicesCollection ?? []);
for(const indices of indicesCollection ?? []) {
const filteredIndices = indices.indices.filter(index => index.lexeme === accessArg);
if(filteredIndices.length == 0) {
continue;
}
accessedIndicesCollection ??= [];
accessedIndicesCollection.push({
indices: filteredIndices,
isSingleIndex: indices.isSingleIndex
});
// If the accessedArg is a symbol, it's either a simple access or the base case of a nested access
if(accessedArg.value?.type === RType.Symbol) {
accessedIndicesCollection = resolveSingleIndex(accessedArg, accessArg, data.environment);
} else {
// Higher access call
const underlyingAccessId = accessedArg.value?.info.id ?? -1;
const vertex = fnCall.information.graph.getVertex(underlyingAccessId);
const subIndices = vertex?.indicesCollection
?.flatMap(indices => indices.indices)
?.flatMap(index => (index as ContainerParentIndex)?.subIndices ?? []);
if(subIndices) {
accessedIndicesCollection = filterIndices(subIndices, accessArg);
}
}

// Add indices to vertex afterwards
if(accessedIndicesCollection) {
const vertex = fnCall.information.graph.getVertex(rootId);
if(vertex) {
vertex.indicesCollection = accessedIndicesCollection;
}

// When access has no access as parent, it's the top most
const rootNode = data.completeAst.idMap.get(rootId);
const parentNode = data.completeAst.idMap.get(rootNode?.info.parent ?? -1);
if(parentNode?.type !== RType.Access) {
// Only reference indices in top most access
referenceIndices(accessedIndicesCollection, fnCall, name.info.id);
}
}

const fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data, forceArgs: config.forceArgs }, accessedIndicesCollection);
const accessedIndices = accessedIndicesCollection?.flatMap(indices => indices.indices);
referenceIndices(accessedIndices, fnCall, name.info.id);
return fnCall;
}

/**
* Creates reads edges to accessed indices and sub-indices of node
* Resolves {@link accessedArg} in the {@link environment} and filters its indices according to {@link accessArg}.
*
* @param accessedArg - The argument to resolve
* @param accessArg - The argument which is used to filter the indices
* @param environment - The environment in which {@link accessedArg} is resolved
* @returns The filtered {@link ContainerIndicesCollection} of the resolved {@link accessedArg}
*/
function resolveSingleIndex<OtherInfo>(
accessedArg: RArgument<OtherInfo & ParentInformation>,
accessArg: Base<OtherInfo & ParentInformation>,
environment: REnvironmentInformation,
): ContainerIndicesCollection {
const definitions = resolveByName(accessedArg.lexeme, environment);
const indicesCollection = definitions?.flatMap(def => (def as InGraphIdentifierDefinition)?.indicesCollection ?? []);
const accessedIndicesCollection = filterIndices(indicesCollection, accessArg);
return accessedIndicesCollection;
}

/**
* Filters the single indices of the {@link indicesCollection} according to the lexeme of the {@link accessArg}.
*
* @param indicesCollection - The {@link ContainerIndicesCollection} to filter
* @param accessArg - The argument which is used to filter {@link indicesCollection}
* @returns The filtered copy of {@link indicesCollection}
*/
function filterIndices<OtherInfo>(
indicesCollection: ContainerIndicesCollection,
accessArg: Base<OtherInfo & ParentInformation>,
): ContainerIndicesCollection {
let accessedIndicesCollection: ContainerIndicesCollection = undefined;
for(const indices of indicesCollection ?? []) {
const filteredIndices = indices.indices.filter(index => accessArg.lexeme === index.lexeme);

if(filteredIndices.length == 0) {
continue;
}

accessedIndicesCollection ??= [];
accessedIndicesCollection.push({
indices: filteredIndices,
isSingleIndex: indices.isSingleIndex
});
}
return accessedIndicesCollection;
}

/**
*
* Creates edges of type {@link EdgeType.Reads} to the {@link accessedIndices} and their sub-indices starting from
* the node with {@link parentNodeId}.
*
* @param accessedIndices - All indices that were accessed by the access operation
* @param parentNodeId - {@link NodeId} of the parent from which the edge starts
*/
function referenceIndices(
accessedIndices: ContainerIndex[] | undefined,
accessedIndicesCollection: ContainerIndicesCollection,
fnCall: ProcessKnownFunctionCallResult,
parentNodeId: NodeId,
) {
const accessedIndices = accessedIndicesCollection?.flatMap(indices => indices.indices);

for(const accessedIndex of accessedIndices ?? []) {
fnCall.information.graph.addEdge(parentNodeId, accessedIndex.nodeId, EdgeType.Reads);
const accessedSubIndices = (accessedIndex as ContainerParentIndex)?.subIndices ?? [];
const accessedSubIndices = ('subIndices' in accessedIndex) ? accessedIndex.subIndices : undefined;
referenceIndices(accessedSubIndices, fnCall, accessedIndex.nodeId);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,13 @@ export function processList<OtherInfo>(
// Check whether argument value is non-primitve
if(arg.value?.type === RType.Symbol) {
const defs = resolveByName(arg.value.lexeme, data.environment);
const indices = defs
?.flatMap(index => (index as InGraphIdentifierDefinition).indicesCollection ?? [])
.flatMap(indices => indices.indices);
// console.log('indices of', arg.value.lexeme, ':', indices);
newIndex = {
...newIndex,
subIndices: indices,
};
const indices = defs?.flatMap(index => (index as InGraphIdentifierDefinition).indicesCollection ?? []);
if(indices) {
newIndex = {
...newIndex,
subIndices: indices,
};
}
}

namedArguments.push(newIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export function markNonStandardEvaluationEdges(
}

export function processKnownFunctionCall<OtherInfo>(
{ name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d, hasUnknownSideEffect }: ProcessKnownFunctionCallInput<OtherInfo>, indicesCollection: ContainerIndicesCollection = undefined,
{ name, args, rootId, data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d, hasUnknownSideEffect }: ProcessKnownFunctionCallInput<OtherInfo>, indicesCollection: ContainerIndicesCollection = undefined,
): ProcessKnownFunctionCallResult {
const functionName = processDataflowFor(name, data);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,19 @@ import { label } from '../../_helper/label';

describe.sequential('List Access', withShell(shell => {
describe('Access named argument', () => {
assertDataflow(label('Assert reads edge to named argument', ['name-normal', 'function-calls', 'named-arguments', 'dollar-access', 'subsetting']), shell, `person <- list(age = 24, name = "John")
assertDataflow(
label('Assert reads edge to named argument', ['name-normal', 'function-calls', 'named-arguments', 'dollar-access', 'subsetting']),
shell,
`person <- list(age = 24, name = "John")
person$name`,
emptyGraph()
.defineVariable('1@person')
.reads('2@person', '1@person')
.reads('2@$', '7'),
{
expectIsSubgraph: true,
resolveIdsAsCriterion: true,
});
emptyGraph()
.defineVariable('1@person')
.reads('2@person', '1@person')
.reads('2@$', '7'),
{
expectIsSubgraph: true,
resolveIdsAsCriterion: true,
}
);
});
}));
Loading

0 comments on commit c578925

Please sign in to comment.