Skip to content

Commit

Permalink
0.5.1 - Fix 'switch' statement decompilation. Start work on 'if' stat…
Browse files Browse the repository at this point in the history
…ements.
  • Loading branch information
TeamworkGuy2 committed Dec 6, 2020
1 parent 2497bc4 commit 3653372
Show file tree
Hide file tree
Showing 14 changed files with 488 additions and 173 deletions.
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,19 @@ This project does its best to adhere to [Semantic Versioning](http://semver.org/


--------
### [0.5.0](N/A) - 2020-12-05
### [0.5.1](N/A) - 2020-12-05
__Fix `switch` statements to decompile much more accurately based on code flow analysis. Start work on `if` statements.__
#### Added
* new `Indent` class to handle `SourceWriter` indentation
* `Switch` to handle switch code flow initialization and info

#### Changed
* Add code flow analysis initialization and info to `JumpConditionInfo`
* Assume that a forward goto right before a condition is an `else` statement


--------
### [0.5.0](https://github.com/TeamworkGuy2/ClassLoading/commit/2497bc4caaa27e6574afad64cace3475238da9f2) - 2020-12-05
__Decompilation to source code in-progress and first round trip compile/decompile unit tests__
#### Added
* A new `twg2.jbcm.ir` package with helper classes for tracking state and data related to decompilation
Expand Down
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,23 @@ See the `twg2.jbcm.main.UsageCliMain` class for a simple command line interface

Reference: [Java Virtual Machine Spec (Java 9)](https://docs.oracle.com/javase/specs/jvms/se9/html/index.html)

### `twg2.jbcm.classFormat`
Packages:
### twg2.jbcm.classFormat
Contains implementation of the [class file format](https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-4.html)
with related attributes (`twg2.jbcm.classFormat.attributes`) and constant pool types (`twg2.jbcm.classFormat.constantPool`).

### `twg2.jbcm` and `twg2.jbcm.modify`
### twg2.jbcm & twg2.jbcm.modify
Interfaces and utilities for searching and modifying class files.

### `twg2.jbcm`
### twg2.jbcm
Utilities and the `Opcodes` enum containing detailed, programatic information about the [Java instruction set opcodes](https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-6.html#jvms-6.5).
Also see the [extract-opcodes.js] file for how the enum literals in `Opcodes` are generated.
Also see [extract-opcodes.js](extract-opcodes.js) file for how the enum literals in `Opcodes` are generated.

### `twg2.jbcm.dynamicModification` and `twg2.jbcm.parserExamples`
### twg2.jbcm.dynamicModification & twg2.jbcm.parserExamples
Classes used by the example and test packages.

### `twg2.jbcm.runtimeLoading`
### twg2.jbcm.runtimeLoading
Runtime class loading.

### `twg2.jbcm.main`
### twg2.jbcm.main
Example console apps.
2 changes: 1 addition & 1 deletion package-lib.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version" : "0.5.0",
"version" : "0.5.1",
"name" : "class-loading",
"description" : "Java class file parsing, manipulation, and to human readable representation",
"homepage" : "https://github.com/TeamworkGuy2/ClassLoading",
Expand Down
14 changes: 14 additions & 0 deletions src/twg2/jbcm/CodeFlow.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ public static IntArrayList getFlowPaths(int idx, byte[] instr, IntArrayList dstP
dstPath.add(~i);
}
int jumpDst = opc.getJumpDestination(instr, i);
if(jumpDst < 0) {
jumpDst = opc.getJumpDestination(instr, i);
}
getFlowPaths(jumpDst, instr, dstPath);

// end this code path if the jump path is unconditional (i.e. GOTO or JSR)
Expand All @@ -52,6 +55,17 @@ else if(opc.hasBehavior(Type.RETURN) || opc == Opcodes.ATHROW) {
}


public static int maxIndex(IntListReadOnly codeFlow) {
int max = -1;
for(int i = 0, size = codeFlow.size(); i < size; i++) {
int index = codeFlow.get(i);
max = Math.max(index < 0 ? ~index : index, max);
}

return max;
}


public static String flowPathToString(byte[] instr, IntListReadOnly codeFlow) {
var sb = new StringBuilder();
for(int i = 0, size = codeFlow.size(); i < size; i++) {
Expand Down
9 changes: 6 additions & 3 deletions src/twg2/jbcm/IoUtility.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static void writeShort(short value, byte[] b, int offset) {
* a long by {@code (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3]}
*/
public static long readLong(byte[] b, int offset) {
return ((long)b[offset] << 56) |
return ((long)(b[offset] & 0xFF) << 56) |
((long)(b[offset+1] & 0xFF) << 48) |
((long)(b[offset+2] & 0xFF) << 40) |
((long)(b[offset+3] & 0xFF) << 32) |
Expand All @@ -81,7 +81,10 @@ public static long readLong(byte[] b, int offset) {
* an integer by {@code (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3]}
*/
public static int readInt(byte[] b, int offset) {
return (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3];
return ((b[offset] & 0xFF) << 24) |
((b[offset+1] & 0xFF) << 16) |
((b[offset+2] & 0xFF) << 8) |
(b[offset+3] & 0xFF);
}


Expand All @@ -92,7 +95,7 @@ public static int readInt(byte[] b, int offset) {
* a short by {@code (b[offset] << 8) | b[offset+1]}
*/
public static short readShort(byte[] b, int offset) {
return (short)((b[offset] << 8) | b[offset+1]);
return (short)(((b[offset] & 0xFF) << 8) | (b[offset+1] & 0xFF));
}


Expand Down
71 changes: 65 additions & 6 deletions src/twg2/jbcm/ir/JumpConditionInfo.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package twg2.jbcm.ir;

import java.util.List;
import java.util.concurrent.atomic.AtomicReference;

import twg2.collections.primitiveCollections.IntArrayList;
import twg2.jbcm.CodeFlow;
import twg2.jbcm.Opcodes;

/** Contains the beginning and end opcodes and targets for a goto/if condition pair
Expand All @@ -9,14 +14,18 @@
public class JumpConditionInfo {
private final Opcodes opc;
private final int opcIdx;
private final int targetOffsetIdx;
private final int targetOffset;
private final IntArrayList codeFlow;
public final int codeFlowMaxIndex;
private boolean finished;


public JumpConditionInfo(Opcodes opc, int opcIdx, int targetOffsetIdx) {
public JumpConditionInfo(Opcodes opc, int opcIdx, int targetOffset, int codeFlowMaxIndex, IntArrayList codeFlow) {
this.opc = opc;
this.opcIdx = opcIdx;
this.targetOffsetIdx = targetOffsetIdx;
this.targetOffset = targetOffset;
this.codeFlowMaxIndex = codeFlowMaxIndex;
this.codeFlow = codeFlow;
}


Expand All @@ -30,13 +39,13 @@ public int getOpcodeIndex() {
}


public int getTargetOffset() {
return targetOffsetIdx;
public IntArrayList getCodeFlow() {
return codeFlow;
}


public int getTargetIndex() {
return opcIdx + targetOffsetIdx;
return opcIdx + targetOffset;
}


Expand All @@ -49,4 +58,54 @@ public boolean isFinished() {
return finished;
}


/** Analyze a switch case and return helpful information about it's bytecode layout.
* Used by {@link #loadTableSwitch(int, byte[], List, AtomicReference) and {@link #loadLookupSwitch(int, byte[], List, AtomicReference)}
* @param caseMatch the value to match for this case in the switch
* @param targetIdx the target {@code instr} index at which the case's code begins
* @param instr the method bytecode array
* @return the analyzed switch information
*/
public static JumpConditionInfo loadConditionFlow(Opcodes opc, int idx, int targetOffset, byte[] instr) {
// analyze code flow path
var condFlowPath = new IntArrayList();
condFlowPath.add(~idx);
CodeFlow.getFlowPaths(idx, instr, condFlowPath);

// potential end index (probably redundant once code flow is working)
var maxCodeFlowIndex = CodeFlow.maxIndex(condFlowPath);

return new JumpConditionInfo(opc, idx, targetOffset, maxCodeFlowIndex, condFlowPath);
}


public static int findLoopStart(int curIdx, int jumpRelative, List<JumpConditionInfo> loops) {
// Loops are generally compiled using a GOTO and an IF_* instruction
// form 1: [..., GOTO <setup_if[0]>, instructions[], setup_if[], IF_* <instructions[0]>, ...]
if(jumpRelative < 0) {
var jumpToIdx = curIdx + jumpRelative - 3; // GOTO has a 2 byte operand so -3 is the GOTO instruction index right before the jump destination (which is the first instruction in a loop)
for(int i = loops.size() - 1; i >= 0; i--) {
var cond = loops.get(i);
if(cond.getOpcodeIndex() == jumpToIdx) {
return i;
}
}
}
return -1;
}


public static int findLoopEnd(int curIdx, int numOperands, int jumpRelative, List<JumpConditionInfo> loops) {
if(jumpRelative > 0) {
var instAfterJumpIdx = curIdx + numOperands + 1;
for(int i = loops.size() - 1; i >= 0; i--) {
var cond = loops.get(i);
if(cond.getTargetIndex() == instAfterJumpIdx) {
return i;
}
}
}
return -1;
}

}
75 changes: 75 additions & 0 deletions src/twg2/jbcm/ir/Switch.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package twg2.jbcm.ir;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;

import twg2.jbcm.toSource.SwitchFlow;

/**
* @author TeamworkGuy2
* @since 2020-12-05
*/
public class Switch {
public List<SwitchCase> switchCases;
protected List<SwitchCase> finishedCases;
public SwitchCase switchDefault;
public int switchInstSize;
public int switchEndIdx;
/** whether all the cases in this switch return/throw before the next case starts (see {@link SwitchFlow#isSwitchSimplePacked(List, SwitchCase, byte[])}) */
public boolean isReturnPacked;


public Switch(List<SwitchCase> switchCases, SwitchCase switchDefault) {
this.switchCases = switchCases;
this.switchDefault = switchDefault;
this.finishedCases = new ArrayList<SwitchCase>();
}


public void finish(SwitchCase switchCase) {
switchCase.finish();
finishedCases.add(switchCase);
}


public boolean isFinished() {
return finishedCases.size() == switchCases.size() + 1; // + 1 for default case
}


public static Switch loadTableSwitch(int i, byte[] instr) {
var dstCases = new ArrayList<SwitchCase>();
var dstSwitchDefault = new AtomicReference<SwitchCase>();
int newI = SwitchFlow.loadTableSwitch(i, instr, dstCases, dstSwitchDefault);
var inst = new Switch(dstCases, dstSwitchDefault.get());
int endIdx = SwitchFlow.commonSwitchEndIndex(inst.switchCases, inst.switchDefault, instr);
if(endIdx == -1) {
endIdx = SwitchFlow.maxSwitchCodeFlowIndex(inst.switchCases, inst.switchDefault, instr);
if(endIdx > -1) {
inst.isReturnPacked = true;
}
}
inst.switchEndIdx = endIdx;
inst.switchInstSize = newI;
return inst;
}


public static Switch loadLookupSwitch(int i, byte[] instr) {
var dstCases = new ArrayList<SwitchCase>();
var dstSwitchDefault = new AtomicReference<SwitchCase>();
int newI = SwitchFlow.loadLookupSwitch(i, instr, dstCases, dstSwitchDefault);
var inst = new Switch(dstCases, dstSwitchDefault.get());
int endIdx = SwitchFlow.commonSwitchEndIndex(inst.switchCases, inst.switchDefault, instr);
if(endIdx == -1) {
endIdx = SwitchFlow.maxSwitchCodeFlowIndex(inst.switchCases, inst.switchDefault, instr);
if(endIdx > -1) {
inst.isReturnPacked = true;
}
}
inst.switchEndIdx = endIdx;
inst.switchInstSize = newI;
return inst;
}
}
34 changes: 8 additions & 26 deletions src/twg2/jbcm/ir/SwitchCase.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import java.util.Comparator;

import twg2.collections.primitiveCollections.IntList;
import twg2.collections.primitiveCollections.IntArrayList;
import twg2.jbcm.CodeFlow;

/**
Expand All @@ -17,49 +17,31 @@ public class SwitchCase {
}
};

public static Comparator<SwitchCase> CASE_INDEX_COMPARATOR = new Comparator<SwitchCase>() {
public static Comparator<SwitchCase> CASE_TARGET_INDEX_COMPARATOR = new Comparator<SwitchCase>() {
@Override public int compare(SwitchCase o1, SwitchCase o2) {
return o1.caseTarget - o2.caseTarget;
}
};

public final int caseMatch;
public final int caseTarget;
public final int caseEndIdx;
public final int caseEndTarget;
public final boolean hasEndTarget;
private final IntList codeFlow;
private final IntArrayList codeFlow;
public final int codeFlowMaxIndex;
private boolean finished;

public SwitchCase(int caseMatch, int caseTarget, int caseEndIdx, IntList codeFlow) {
this.caseMatch = caseMatch;
this.caseTarget = caseTarget;
this.caseEndIdx = caseEndIdx;
this.caseEndTarget = 0;
this.hasEndTarget = false;
this.codeFlow = codeFlow;
}


public SwitchCase(int caseMatch, int caseTarget, int caseEndIdx, int caseEndTarget, IntList codeFlow) {
public SwitchCase(int caseMatch, int caseTarget, int codeFlowMaxIndex, IntArrayList codeFlow) {
this.caseMatch = caseMatch;
this.caseTarget = caseTarget;
this.caseEndIdx = caseEndIdx;
this.caseEndTarget = caseEndTarget;
this.hasEndTarget = true;
this.codeFlowMaxIndex = codeFlowMaxIndex;
this.codeFlow = codeFlow;
}


public boolean contains(int idx) {
return idx >= caseTarget && idx <= caseEndIdx;
}


/** The {@link CodeFlow} for this switch case statement, starting from the case target, tracing all non-circular paths within the method's code
* @return
*/
public IntList getCodeFlow() {
public IntArrayList getCodeFlow() {
return codeFlow;
}

Expand All @@ -76,6 +58,6 @@ public boolean isFinished() {

@Override
public String toString() {
return "case " + this.caseMatch + ": [" + this.caseTarget + ", " + this.caseEndIdx + (this.hasEndTarget ? "] -> " + this.caseEndTarget : "]");
return "case " + this.caseMatch + ": [" + this.caseTarget + ", " + this.codeFlow + "]";
}
}
Loading

0 comments on commit 3653372

Please sign in to comment.