From 5747b48d091156cbbf7e3e801808686197ba8e54 Mon Sep 17 00:00:00 2001 From: Stephan Kleber Date: Sun, 28 Jun 2020 23:13:26 +0200 Subject: [PATCH 1/2] init package nemere --- src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/__init__.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..e69de29b From cbdcf9dffcc2d2f56b5e01c1dd4def8b6dd8ddee Mon Sep 17 00:00:00 2001 From: Stephan Kleber Date: Mon, 29 Jun 2020 00:43:59 +0200 Subject: [PATCH 2/2] refactor references to new top-level package nemere --- src/characterize_fieldtypes.py | 29 ++++++------ src/check_parse-pcap.py | 4 +- src/check_pcap-info.py | 2 +- src/inference/__init__.py | 2 - src/mixedlength_fieldtype_offsets.py | 20 ++++---- src/nemeftr_cluster-segments.py | 11 +++-- src/nemeftr_cluster-true-fields.py | 15 +++--- src/nemeftr_field-recognition.py | 22 +++++---- src/nemeftr_generate-fieldtype-templates.py | 24 +++++----- src/{ => nemere}/__init__.py | 0 src/{ => nemere}/alignment/__init__.py | 0 src/{ => nemere}/alignment/alignMessages.py | 22 ++++----- src/{ => nemere}/alignment/clusterMerging.py | 13 +++-- .../alignment/clusterSplitting.py | 4 +- .../alignment/hirschbergAlignSegments.py | 0 src/nemere/inference/__init__.py | 2 + src/{ => nemere}/inference/analyzers.py | 4 +- src/{ => nemere}/inference/fieldTypes.py | 15 +++--- .../inference/formatRefinement.py | 26 +++++----- src/{ => nemere}/inference/labeling.py | 2 +- src/{ => nemere}/inference/segmentHandler.py | 28 +++++------ src/{ => nemere}/inference/segments.py | 6 +-- src/{ => nemere}/inference/templates.py | 47 ++++++++++--------- src/{ => nemere}/inference/trackingBIDE.py | 3 +- src/{ => nemere}/utils/__init__.py | 0 src/{ => nemere}/utils/baseAlgorithms.py | 4 +- src/{ => nemere}/utils/evaluationHelpers.py | 28 +++++------ src/{ => nemere}/utils/loader.py | 4 +- src/{ => nemere}/validation/__init__.py | 0 .../validation/dissectorMatcher.py | 12 ++--- src/{ => nemere}/validation/messageParser.py | 2 +- .../validation/netzobFormatMatchScore.py | 4 +- src/{ => nemere}/validation/reportWriter.py | 4 +- .../validation/tsharkConnector.py | 0 src/{ => nemere}/visualization/__init__.py | 0 src/{ => nemere}/visualization/bcolors.py | 0 .../visualization/distancesPlotter.py | 17 +++---- .../visualization/multiPlotter.py | 11 +++-- src/{ => nemere}/visualization/plotter.py | 4 +- src/{ => nemere}/visualization/simplePrint.py | 8 ++-- .../visualization/singlePlotter.py | 10 ++-- src/nemesys.py | 4 +- src/nemesys_field-deviation-plot.py | 8 ++-- src/nemesys_fms.py | 16 +++---- src/nemesys_pca-refinement-iterate-params.py | 30 ++++++------ src/nemesys_pca-refinement.py | 30 ++++++------ src/nemetyl_align-segments.py | 29 +++++++----- src/nemezero_pca-refinement.py | 28 ++++++----- src/netzob_fms.py | 8 ++-- src/netzob_messagetypes.py | 16 +++---- src/prep_deduplicate-trace.py | 11 +++-- src/prep_filter-maxdiff-trace.py | 19 ++++---- src/refine-segmenter.py | 12 ++--- src/test_segment-refinements.py | 32 +++++-------- src/transform_cluster-statistics.py | 4 +- src/visualize_fieldtype_separation.py | 17 ++++--- 56 files changed, 336 insertions(+), 337 deletions(-) delete mode 100644 src/inference/__init__.py rename src/{ => nemere}/__init__.py (100%) rename src/{ => nemere}/alignment/__init__.py (100%) rename src/{ => nemere}/alignment/alignMessages.py (94%) rename src/{ => nemere}/alignment/clusterMerging.py (98%) rename src/{ => nemere}/alignment/clusterSplitting.py (99%) rename src/{ => nemere}/alignment/hirschbergAlignSegments.py (100%) create mode 100644 src/nemere/inference/__init__.py rename src/{ => nemere}/inference/analyzers.py (99%) rename src/{ => nemere}/inference/fieldTypes.py (98%) rename src/{ => nemere}/inference/formatRefinement.py (99%) rename src/{ => nemere}/inference/labeling.py (98%) rename src/{ => nemere}/inference/segmentHandler.py (96%) rename src/{ => nemere}/inference/segments.py (99%) rename src/{ => nemere}/inference/templates.py (98%) rename src/{ => nemere}/inference/trackingBIDE.py (99%) rename src/{ => nemere}/utils/__init__.py (100%) rename src/{ => nemere}/utils/baseAlgorithms.py (97%) rename src/{ => nemere}/utils/evaluationHelpers.py (97%) rename src/{ => nemere}/utils/loader.py (97%) rename src/{ => nemere}/validation/__init__.py (100%) rename src/{ => nemere}/validation/dissectorMatcher.py (99%) rename src/{ => nemere}/validation/messageParser.py (99%) rename src/{ => nemere}/validation/netzobFormatMatchScore.py (98%) rename src/{ => nemere}/validation/reportWriter.py (97%) rename src/{ => nemere}/validation/tsharkConnector.py (100%) rename src/{ => nemere}/visualization/__init__.py (100%) rename src/{ => nemere}/visualization/bcolors.py (100%) rename src/{ => nemere}/visualization/distancesPlotter.py (97%) rename src/{ => nemere}/visualization/multiPlotter.py (98%) rename src/{ => nemere}/visualization/plotter.py (96%) rename src/{ => nemere}/visualization/simplePrint.py (96%) rename src/{ => nemere}/visualization/singlePlotter.py (96%) diff --git a/src/characterize_fieldtypes.py b/src/characterize_fieldtypes.py index a126eedd..a1fd3476 100644 --- a/src/characterize_fieldtypes.py +++ b/src/characterize_fieldtypes.py @@ -4,8 +4,8 @@ Segments are generated using groundtruth from dissectors. TODO CAVEAT and cleanup: - Very old state without epsilon autoconfiguration and default epsilon values based on a code state - prior to the fixing of the "precomputed dissimilarity matrix bug". + Very old state without epsilon autoconfiguration and default epsilon values based on a code state + prior to the fixing of the "precomputed dissimilarity matrix bug". Takes a PCAP trace of a known protocol, dissects each message into their fields, and yields segments from each of them. These segments get analyzed by the given analysis method which is used as feature to determine their similarity. @@ -17,18 +17,17 @@ from os.path import isfile, basename from itertools import chain -from utils.loader import SpecimenLoader -from utils.baseAlgorithms import tril -from utils.evaluationHelpers import epspertrace, epsdefault, analyses, annotateFieldTypes, plotMultiSegmentLines, \ - labelForSegment -from inference.templates import TemplateGenerator, DistanceCalculator, DBSCANsegmentClusterer, HDBSCANsegmentClusterer, DelegatingDC -from inference.segments import TypedSegment -from inference.analyzers import * -from inference.segmentHandler import groupByLength, segments2types, segments2clusteredTypes, \ - filterSegments -from validation.dissectorMatcher import MessageComparator -from visualization.distancesPlotter import DistancesPlotter -from visualization.singlePlotter import SingleMessagePlotter +from nemere.utils.loader import SpecimenLoader +from nemere.utils.baseAlgorithms import tril +from nemere.utils.evaluationHelpers import epspertrace, epsdefault, analyses, annotateFieldTypes, \ + plotMultiSegmentLines, labelForSegment +from nemere.inference.templates import DistanceCalculator, DBSCANsegmentClusterer, HDBSCANsegmentClusterer +from nemere.inference.segments import TypedSegment +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import groupByLength, segments2types, segments2clusteredTypes, filterSegments +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.singlePlotter import SingleMessagePlotter debug = False @@ -126,7 +125,7 @@ def evaluateFieldTypeClustering(filteredSegments, eps, thresholdFunction, thresh # lenMasks[seg.length][idx] = True # from tabulate import tabulate - from inference.templates import Template + from nemere.inference.templates import Template # # field type change for labels # segFieldtypes = [seg.fieldtype if pseg.fieldtype != seg.fieldtype else '' for seg, pseg in # zip(filteredSegments, filteredSegments[:1] + filteredSegments)] diff --git a/src/check_parse-pcap.py b/src/check_parse-pcap.py index 319d316b..c1110437 100644 --- a/src/check_parse-pcap.py +++ b/src/check_parse-pcap.py @@ -10,8 +10,8 @@ from sys import exit import IPython -from validation.messageParser import ParsedMessage -from utils.loader import SpecimenLoader +from nemere.validation.messageParser import ParsedMessage +from nemere.utils.loader import SpecimenLoader if __name__ == '__main__': parser = ArgumentParser( diff --git a/src/check_pcap-info.py b/src/check_pcap-info.py index e10addb6..6e4ec2a3 100644 --- a/src/check_pcap-info.py +++ b/src/check_pcap-info.py @@ -6,7 +6,7 @@ from os.path import isfile, basename from tabulate import tabulate -from utils.loader import SpecimenLoader +from nemere.utils.loader import SpecimenLoader def countByteFrequency(): diff --git a/src/inference/__init__.py b/src/inference/__init__.py deleted file mode 100644 index 80ce82f9..00000000 --- a/src/inference/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -__all__ = ['analyzers.py', 'segments.py'] \ No newline at end of file diff --git a/src/mixedlength_fieldtype_offsets.py b/src/mixedlength_fieldtype_offsets.py index 36977b2d..df31c2c7 100644 --- a/src/mixedlength_fieldtype_offsets.py +++ b/src/mixedlength_fieldtype_offsets.py @@ -4,21 +4,19 @@ """ import argparse, IPython -from os.path import isfile, basename, splitext +from os.path import isfile from itertools import chain from tabulate import tabulate from math import ceil, floor -from inference.templates import DistanceCalculator -from inference.segments import TypedSegment -from inference.analyzers import * -from inference.segmentHandler import groupByLength, segments2types, \ - filterSegments -from utils.evaluationHelpers import annotateFieldTypes -from validation.dissectorMatcher import MessageComparator -from utils.loader import SpecimenLoader -from visualization.multiPlotter import MultiMessagePlotter -from visualization.distancesPlotter import DistancesPlotter +from nemere.inference.templates import DistanceCalculator +from nemere.inference.segments import TypedSegment +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import segments2types, filterSegments +from nemere.utils.evaluationHelpers import annotateFieldTypes +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.visualization.multiPlotter import MultiMessagePlotter debug = False diff --git a/src/nemeftr_cluster-segments.py b/src/nemeftr_cluster-segments.py index a838e94e..6a464b83 100644 --- a/src/nemeftr_cluster-segments.py +++ b/src/nemeftr_cluster-segments.py @@ -12,12 +12,15 @@ from os.path import isfile, basename, join, splitext, exists from os import makedirs import matplotlib.pyplot as plt +import numpy -from inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, TypedTemplate, FieldTypeContext, ClusterAutoconfException -from inference.segmentHandler import baseRefinements, originalRefinements, \ +from nemere.inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, TypedTemplate, Template,\ + ClusterAutoconfException +from nemere.inference.segments import MessageSegment, TypedSegment +from nemere.inference.segmentHandler import baseRefinements, originalRefinements, pcaMocoRefinements, \ pcaPcaRefinements, zeroBaseRefinements, isExtendedCharSeq, zeroPCARefinements -from visualization.distancesPlotter import DistancesPlotter -from utils.evaluationHelpers import * +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.utils.evaluationHelpers import * debug = False diff --git a/src/nemeftr_cluster-true-fields.py b/src/nemeftr_cluster-true-fields.py index 799788ea..183bc209 100644 --- a/src/nemeftr_cluster-true-fields.py +++ b/src/nemeftr_cluster-true-fields.py @@ -20,15 +20,14 @@ from matplotlib import pyplot as plt import numpy -from utils.evaluationHelpers import analyses, annotateFieldTypes, labelForSegment, \ +from nemere.utils.evaluationHelpers import analyses, annotateFieldTypes, labelForSegment, \ plotMultiSegmentLines, writeCollectiveClusteringStaticstics -from inference.templates import DBSCANsegmentClusterer, MemmapDC, DelegatingDC, ClusterAutoconfException - -from inference.segmentHandler import segments2types, segments2clusteredTypes, isExtendedCharSeq -from validation.dissectorMatcher import MessageComparator -from utils.loader import SpecimenLoader -from visualization.distancesPlotter import DistancesPlotter -from visualization.singlePlotter import SingleMessagePlotter +from nemere.inference.templates import DBSCANsegmentClusterer, MemmapDC, DelegatingDC, ClusterAutoconfException +from nemere.inference.segmentHandler import segments2types, segments2clusteredTypes, isExtendedCharSeq +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.singlePlotter import SingleMessagePlotter debug = False diff --git a/src/nemeftr_field-recognition.py b/src/nemeftr_field-recognition.py index 7a882068..2a93fc08 100644 --- a/src/nemeftr_field-recognition.py +++ b/src/nemeftr_field-recognition.py @@ -6,22 +6,24 @@ Moreover, contains an (unused) method to evaluate subsequence frequency analysis as means to identify common field types. """ -import argparse +import argparse, numpy from collections import OrderedDict - from os.path import isfile, basename from tabulate import tabulate import IPython -from inference.segmentHandler import symbolsFromSegments -from inference.trackingBIDE import BIDEracker +from nemere.inference.segmentHandler import symbolsFromSegments +from nemere.inference.segments import TypedSegment, MessageAnalyzer +from nemere.inference.trackingBIDE import BIDEracker +from nemere.utils.loader import SpecimenLoader +from nemere.validation import reportWriter +from nemere.validation.dissectorMatcher import DissectorMatcher, MessageComparator +from nemere.inference.fieldTypes import FieldTypeMemento, FieldTypeRecognizer, FieldTypeQuery, RecognizedField +from nemere.visualization.simplePrint import printFieldContext, printMarkedBytesInMessage +from nemere.visualization.multiPlotter import MultiMessagePlotter +from nemere.utils.evaluationHelpers import * + from nemesys_fms import mapQualities2Messages -from validation import reportWriter -from validation.dissectorMatcher import DissectorMatcher -from inference.fieldTypes import FieldTypeMemento, FieldTypeRecognizer, FieldTypeQuery, RecognizedField -from visualization.simplePrint import printFieldContext, printMarkedBytesInMessage -from visualization.multiPlotter import MultiMessagePlotter -from utils.evaluationHelpers import * # fix the analysis method to VALUE analysisTitle = 'value' diff --git a/src/nemeftr_generate-fieldtype-templates.py b/src/nemeftr_generate-fieldtype-templates.py index b985c1a5..429a042f 100644 --- a/src/nemeftr_generate-fieldtype-templates.py +++ b/src/nemeftr_generate-fieldtype-templates.py @@ -11,19 +11,17 @@ """ import argparse, IPython from os.path import isfile, basename -from itertools import chain - -from inference.templates import DBSCANsegmentClusterer, DelegatingDC, DistanceCalculator, FieldTypeTemplate -from inference.fieldTypes import FieldTypeMemento -from inference.segments import TypedSegment, HelperSegment -from inference.analyzers import * -from inference.segmentHandler import groupByLength, segments2types, \ - filterSegments -from validation.dissectorMatcher import MessageComparator -from utils.loader import SpecimenLoader -from visualization.distancesPlotter import DistancesPlotter -from visualization.multiPlotter import MultiMessagePlotter, PlotGroups -from utils.evaluationHelpers import * + +from nemere.inference.templates import DBSCANsegmentClusterer, DelegatingDC, FieldTypeTemplate +from nemere.inference.fieldTypes import FieldTypeMemento +from nemere.inference.segments import TypedSegment +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import segments2types +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.multiPlotter import MultiMessagePlotter, PlotGroups +from nemere.utils.evaluationHelpers import * debug = False diff --git a/src/__init__.py b/src/nemere/__init__.py similarity index 100% rename from src/__init__.py rename to src/nemere/__init__.py diff --git a/src/alignment/__init__.py b/src/nemere/alignment/__init__.py similarity index 100% rename from src/alignment/__init__.py rename to src/nemere/alignment/__init__.py diff --git a/src/alignment/alignMessages.py b/src/nemere/alignment/alignMessages.py similarity index 94% rename from src/alignment/alignMessages.py rename to src/nemere/alignment/alignMessages.py index ae56931b..e238ed1e 100644 --- a/src/alignment/alignMessages.py +++ b/src/nemere/alignment/alignMessages.py @@ -6,10 +6,10 @@ from sklearn.cluster import DBSCAN from scipy.special import comb -from alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity, Alignment -from inference.segmentHandler import matrixFromTpairs -from inference.segments import MessageSegment -from inference.templates import DistanceCalculator, MemmapDC +from nemere.alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity +from nemere.inference.segmentHandler import matrixFromTpairs +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DistanceCalculator, MemmapDC class SegmentedMessages(object): @@ -71,8 +71,8 @@ def _calcSimilarityMatrix(self): """ Calculate a similarity matrix of messages from nwscores yielded by HirschbergOnSegmentSimilarity. - >>> from utils.baseAlgorithms import generateTestSegments - >>> from inference.templates import DistanceCalculator + >>> from nemere.utils.baseAlgorithms import generateTestSegments + >>> from nemere.inference.templates import DistanceCalculator >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -104,8 +104,8 @@ def _calcDistanceMatrix(self, similarityMatrix: numpy.ndarray): """ For clustering, convert the nwscores-based similarity matrix to a distance measure. - >>> from utils.baseAlgorithms import generateTestSegments - >>> from inference.templates import DistanceCalculator + >>> from nemere.utils.baseAlgorithms import generateTestSegments + >>> from nemere.inference.templates import DistanceCalculator >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -213,8 +213,8 @@ def alignMessageType(self, msgcluster: List[Tuple[MessageSegment]]): Messages segments of one cluster aligned to the medoid ("segments that is most similar too all segments") of the cluster. - >>> from utils.baseAlgorithms import generateTestSegments - >>> from inference.templates import DistanceCalculator + >>> from nemere.utils.baseAlgorithms import generateTestSegments + >>> from nemere.inference.templates import DistanceCalculator >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -288,7 +288,7 @@ def autoconfigureDBSCAN(self): :return: minpts, epsilon """ - from utils.baseAlgorithms import autoconfigureDBSCAN + from nemere.utils.baseAlgorithms import autoconfigureDBSCAN # can we omit k = 0 ? # No - recall and even more so precision deteriorates for dns and dhcp (1000s) epsilon, min_samples, k = autoconfigureDBSCAN(self.neighbors()) diff --git a/src/alignment/clusterMerging.py b/src/nemere/alignment/clusterMerging.py similarity index 98% rename from src/alignment/clusterMerging.py rename to src/nemere/alignment/clusterMerging.py index 8861e234..e6c13fca 100644 --- a/src/alignment/clusterMerging.py +++ b/src/nemere/alignment/clusterMerging.py @@ -5,9 +5,9 @@ from networkx import Graph from networkx.algorithms.components.connected import connected_components -from inference.templates import DistanceCalculator, Template -from alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity, NWonSegmentSimilarity -from inference.analyzers import * +from nemere.inference.templates import DistanceCalculator, Template +from nemere.alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity +from nemere.inference.analyzers import * @@ -323,7 +323,6 @@ def generateMatchingConditionsAlt2(self, alignedFieldClasses): @staticmethod def selectMatchingClusters(alignedFieldClasses, matchingConditions): - from math import ceil def lenAndTrue(boolist, length=2, truths=0): return len(boolist) <= length and len([a for a in boolist if a]) > truths @@ -403,8 +402,8 @@ def mergeClusters(self, messageClusters, clusterStats, alignedFieldClasses, matchingClusters, matchingConditions): import IPython from tabulate import tabulate - from utils.evaluationHelpers import printClusterMergeConditions - from inference.templates import Template + from nemere.utils.evaluationHelpers import printClusterMergeConditions + from nemere.inference.templates import Template remDue2gaps = [ clunuAB for clunuAB in matchingClusters @@ -592,7 +591,7 @@ def __init__(self, alignedClusters: Dict[int, List], dc: DistanceCalculator): def calcClusterDistances(self, mmg=(0, -1, 5)): - from inference.segmentHandler import matrixFromTpairs + from nemere.inference.segmentHandler import matrixFromTpairs fclassHirsch, statDynFields, statDynValues = self.generateHirsch(mmg) diff --git a/src/alignment/clusterSplitting.py b/src/nemere/alignment/clusterSplitting.py similarity index 99% rename from src/alignment/clusterSplitting.py rename to src/nemere/alignment/clusterSplitting.py index c08e21d7..b62bf543 100644 --- a/src/alignment/clusterSplitting.py +++ b/src/nemere/alignment/clusterSplitting.py @@ -8,8 +8,8 @@ from collections import Counter from tabulate import tabulate -from inference.segments import MessageSegment -from alignment.alignMessages import SegmentedMessages +from nemere.inference.segments import MessageSegment +from nemere.alignment.alignMessages import SegmentedMessages debug = True diff --git a/src/alignment/hirschbergAlignSegments.py b/src/nemere/alignment/hirschbergAlignSegments.py similarity index 100% rename from src/alignment/hirschbergAlignSegments.py rename to src/nemere/alignment/hirschbergAlignSegments.py diff --git a/src/nemere/inference/__init__.py b/src/nemere/inference/__init__.py new file mode 100644 index 00000000..15dc8b4c --- /dev/null +++ b/src/nemere/inference/__init__.py @@ -0,0 +1,2 @@ + +__all__ = ['analyzers', 'segments'] \ No newline at end of file diff --git a/src/inference/analyzers.py b/src/nemere/inference/analyzers.py similarity index 99% rename from src/inference/analyzers.py rename to src/nemere/inference/analyzers.py index aa2c08cf..92ea9766 100644 --- a/src/inference/analyzers.py +++ b/src/nemere/inference/analyzers.py @@ -16,7 +16,7 @@ # The analyzer implementations heavily depend on the MessageAnalyzer base class # that itself is deeply intertwined with the MessageSegment class: -from inference.segments import MessageAnalyzer, MessageSegment, SegmentAnalyzer +from nemere.inference.segments import MessageAnalyzer, MessageSegment, SegmentAnalyzer class NothingToCompareError(ValueError): @@ -428,7 +428,7 @@ def analyze(self): """ if not self._n: raise ParametersNotSet('Analysis parameter missing: N-gram size ("n").') - from utils.baseAlgorithms import ngrams + from nemere.utils.baseAlgorithms import ngrams super().analyze() self._ngramMean = [float(numpy.mean(bcn)) for bcn in ngrams(self._values, self._n)] diff --git a/src/inference/fieldTypes.py b/src/nemere/inference/fieldTypes.py similarity index 98% rename from src/inference/fieldTypes.py rename to src/nemere/inference/fieldTypes.py index a926e346..0410df75 100644 --- a/src/inference/fieldTypes.py +++ b/src/nemere/inference/fieldTypes.py @@ -3,10 +3,11 @@ import numpy import scipy.spatial -from inference.analyzers import Value -from inference.segments import MessageAnalyzer, MessageSegment, TypedSegment from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage +from nemere.inference.analyzers import Value +from nemere.inference.segments import MessageAnalyzer, MessageSegment, TypedSegment + @@ -84,8 +85,8 @@ def cov(self) -> numpy.ndarray: """ There is some rounding error so the stdev is not entierely identical to the diagonal of the covariance matrix. - >>> from inference.templates import FieldTypeTemplate - >>> from inference.segments import MessageSegment + >>> from nemere.inference.templates import FieldTypeTemplate + >>> from nemere.inference.segments import MessageSegment >>> bs = [MessageSegment(None, 0, 1)] >>> ftt = FieldTypeTemplate(bs) >>> numpy.round(ftt.stdev, 8) == numpy.round(ftt.cov.diagonal(), 8) @@ -394,7 +395,7 @@ def charsInMessage(self) -> List[RecognizedField]: :return: list of recognized char sequences with the constant confidence of 0.2 """ - from inference.segmentHandler import isExtendedCharSeq + from nemere.inference.segmentHandler import isExtendedCharSeq confidence = 0.2 offset = 0 @@ -584,7 +585,7 @@ def isOverlapping(aStart: int, aEnd: int, bStart: int, bEnd: int): return aStart <= bStart < aEnd or bStart < aEnd <= bEnd - from inference.segments import TypedSegment + from nemere.inference.segments import TypedSegment def matchStatistics(self, segmentedMessage: List[TypedSegment]): """ Generate lists for statistics of true and false positives and false negatives. @@ -595,7 +596,7 @@ def matchStatistics(self, segmentedMessage: List[TypedSegment]): """ assert segmentedMessage[0].message == self.message - from inference.segmentHandler import segments2types + from nemere.inference.segmentHandler import segments2types typedSegments = segments2types(segmentedMessage) nonConflicting = self.resolveConflicting() diff --git a/src/inference/formatRefinement.py b/src/nemere/inference/formatRefinement.py similarity index 99% rename from src/inference/formatRefinement.py rename to src/nemere/inference/formatRefinement.py index 9502c9ce..dfafcf23 100644 --- a/src/inference/formatRefinement.py +++ b/src/nemere/inference/formatRefinement.py @@ -8,13 +8,13 @@ from kneed import KneeLocator from tabulate import tabulate import IPython + from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from inference.segments import MessageSegment -from inference.segmentHandler import isExtendedCharSeq -from inference.templates import FieldTypeContext, DBSCANsegmentClusterer, DistanceCalculator, Template, \ - ClusterAutoconfException -from validation.dissectorMatcher import MessageComparator +from nemere.inference.segments import MessageSegment +from nemere.inference.segmentHandler import isExtendedCharSeq +from nemere.inference.templates import FieldTypeContext, DBSCANsegmentClusterer, DistanceCalculator, Template, ClusterAutoconfException +from nemere.validation.dissectorMatcher import MessageComparator def isPrintableChar(char: int): @@ -93,9 +93,9 @@ class MergeConsecutiveChars(Merger): Merge consecutive segments completely consisting of printable-char values into a text field. Printable chars are defined as: \t, \n, \r, >= 0x20 and <= 0x7e - >>> from inference.segmentHandler import bcDeltaGaussMessageSegmentation - >>> from utils.loader import SpecimenLoader - >>> import inference.formatRefinement as refine + >>> from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation + >>> from nemere.utils.loader import SpecimenLoader + >>> import nemere.inference.formatRefinement as refine >>> from tabulate import tabulate >>> sl = SpecimenLoader('../input/dns_ictf2010_deduped-100.pcap', layer=0, relativeToIP=True) >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl) @@ -265,9 +265,9 @@ def countPairFrequencies(allMsgsSegs: List[List[MessageSegment]]): Needs only to be called once before all segments of one inference pass can be refined. A different inference required to run this method again before refinement by this class. - >>> from inference.segmentHandler import bcDeltaGaussMessageSegmentation - >>> from utils.loader import SpecimenLoader - >>> import inference.formatRefinement as refine + >>> from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation + >>> from nemere.utils.loader import SpecimenLoader + >>> import nemere.inference.formatRefinement as refine >>> from tabulate import tabulate >>> sl = SpecimenLoader('../input/random-100-continuous.pcap', layer=0, relativeToIP=True) >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl) @@ -1843,7 +1843,7 @@ def removeSuperfluousBounds(newBounds: Dict[AbstractMessage, Dict[MessageSegment :param newBounds: Dict of all input messages, with a Dict mapping each segment to a list of its bounds. :return: reference to the newBounds Dict """ - from visualization.simplePrint import markSegmentInMessage + from nemere.visualization.simplePrint import markSegmentInMessage for message, segsbounds in newBounds.items(): # Below, by list(chain(...)) create a copy to iterate, so we can delete stuff in the original bound lists. @@ -2286,7 +2286,7 @@ def split(self): :return: List of char and non-char segments for the message """ - from inference.fieldTypes import FieldTypeRecognizer + from nemere.inference.fieldTypes import FieldTypeRecognizer ftrecog = FieldTypeRecognizer(self.segments[0].analyzer) # RecognizedFields or type char using isExtendedCharSeq diff --git a/src/inference/labeling.py b/src/nemere/inference/labeling.py similarity index 98% rename from src/inference/labeling.py rename to src/nemere/inference/labeling.py index 3bfaf981..0096e499 100644 --- a/src/inference/labeling.py +++ b/src/nemere/inference/labeling.py @@ -2,7 +2,7 @@ from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -import inference.templates as TG +import nemere.inference.templates as TG class SegmentLabel(object): diff --git a/src/inference/segmentHandler.py b/src/nemere/inference/segmentHandler.py similarity index 96% rename from src/inference/segmentHandler.py rename to src/nemere/inference/segmentHandler.py index 11b09824..75f7a110 100644 --- a/src/inference/segmentHandler.py +++ b/src/nemere/inference/segmentHandler.py @@ -8,9 +8,9 @@ from netzob.Model.Vocabulary.Symbol import Symbol, Field -from inference.segments import MessageSegment, HelperSegment, TypedSegment, AbstractSegment -from inference.analyzers import MessageAnalyzer -from inference.templates import AbstractClusterer, TypedTemplate, DistanceCalculator, DelegatingDC +from nemere.inference.segments import MessageSegment, HelperSegment, TypedSegment, AbstractSegment +from nemere.inference.analyzers import MessageAnalyzer +from nemere.inference.templates import AbstractClusterer, TypedTemplate, DistanceCalculator, DelegatingDC def segmentMeans(segmentsPerMsg: List[List[MessageSegment]]): @@ -175,7 +175,7 @@ def bcDeltaGaussMessageSegmentation(specimens, sigma=0.6) -> List[List[MessageSe """ Segment message by determining inflection points of gauss-filtered bit congruence deltas. - >>> from utils.loader import SpecimenLoader + >>> from nemere.utils.loader import SpecimenLoader >>> sl = SpecimenLoader('../input/random-100-continuous.pcap', layer=0, relativeToIP=True) >>> segmentsPerMsg = bcDeltaGaussMessageSegmentation(sl) Segmentation by inflections of sigma-0.6-gauss-filtered bit-variance. @@ -185,7 +185,7 @@ def bcDeltaGaussMessageSegmentation(specimens, sigma=0.6) -> List[List[MessageSe :return: Segmentation of the specimens in the pool. """ - from inference.analyzers import BitCongruenceDeltaGauss + from nemere.inference.analyzers import BitCongruenceDeltaGauss print('Segmentation by inflections of sigma-{:.1f}-gauss-filtered bit-variance.'.format( sigma @@ -222,7 +222,7 @@ def pcaMocoRefinements(segmentsPerMsg: List[List[MessageSegment]], dc: DistanceC :return: refined segments in list per message """ from itertools import chain - from inference.formatRefinement import RelocatePCA, CropDistinct + from nemere.inference.formatRefinement import RelocatePCA, CropDistinct print("Refine segmentation (+ moco refinements)...") @@ -258,7 +258,7 @@ def pcaRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]], **kwargs) :return: refined segments in list per message """ from itertools import chain - from inference.formatRefinement import RelocatePCA + from nemere.inference.formatRefinement import RelocatePCA print("Refine segmentation (PCA refinements)...") @@ -281,7 +281,7 @@ def pcaPcaRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]], **kwar :return: refined segments in list per message """ from itertools import chain - from inference.formatRefinement import RelocatePCA + from nemere.inference.formatRefinement import RelocatePCA print("Refine segmentation (PCA refinements)...") @@ -305,7 +305,7 @@ def baseRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> List[ :param segmentsPerMsg: a list of one list of segments per message. :return: refined segments in list per message """ - import inference.formatRefinement as refine + import nemere.inference.formatRefinement as refine print("Refine segmentation (base refinements)...") @@ -328,7 +328,7 @@ def baseRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> List[ def zeroBaseRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> List[List[MessageSegment]]: - import inference.formatRefinement as refine + import nemere.inference.formatRefinement as refine print("Refine segmentation (zero-slices refinements)...") @@ -348,7 +348,7 @@ def nemetylRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> Li :param segmentsPerMsg: a list of one list of segments per message. :return: refined segments in list per message """ - import inference.formatRefinement as refine + import nemere.inference.formatRefinement as refine print("Refine segmentation (nemetyl refinements)...") @@ -383,7 +383,7 @@ def charRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> List[ :param segmentsPerMsg: a list of one list of segments per message. :return: refined segments in list per message """ - import inference.formatRefinement as refine + import nemere.inference.formatRefinement as refine print("Refine segmentation (char refinements)...") @@ -411,7 +411,7 @@ def originalRefinements(segmentsPerMsg: Sequence[Sequence[MessageSegment]]) -> L :param segmentsPerMsg: a list of one list of segments per message. :return: refined segments in list per message """ - import inference.formatRefinement as refine + import nemere.inference.formatRefinement as refine print("Refine segmentation (WOOT18 refinements)...") @@ -655,7 +655,7 @@ def locateNonPrintable(bstring: bytes) -> List[int]: :param bstring: a string of bytes :return: position of bytes not in \t, \n, \r or between >= 0x20 and <= 0x7e """ - from inference.formatRefinement import isPrintableChar + from nemere.inference.formatRefinement import isPrintableChar npr = list() for idx, bchar in enumerate(bstring): diff --git a/src/inference/segments.py b/src/nemere/inference/segments.py similarity index 99% rename from src/inference/segments.py rename to src/nemere/inference/segments.py index 6f0d02b9..2382ad91 100644 --- a/src/inference/segments.py +++ b/src/nemere/inference/segments.py @@ -134,7 +134,7 @@ def ngrams(self, n: int): """ :return: the ngrams of the message in order """ - from utils.baseAlgorithms import ngrams + from nemere.utils.baseAlgorithms import ngrams return ngrams(self._message.data, n) @@ -467,7 +467,7 @@ def correlate(self, :param method: The method to correlate with (see class constants prefixed with CORR_ for available options) :return: """ - from utils.baseAlgorithms import ngrams + from nemere.utils.baseAlgorithms import ngrams import scipy.spatial.distance selfCorrMsgs = list() @@ -555,7 +555,7 @@ def fieldCandidate(self): :return: A segment being the best field candidate according to this correlation. """ - from inference.analyzers import NoneAnalysis + from nemere.inference.analyzers import NoneAnalysis # TODO There could be more than one close match... # and multiple segments/field candidates of the same type, therefore matching the same feature diff --git a/src/inference/templates.py b/src/nemere/inference/templates.py similarity index 98% rename from src/inference/templates.py rename to src/nemere/inference/templates.py index 27949065..1d583211 100644 --- a/src/inference/templates.py +++ b/src/nemere/inference/templates.py @@ -6,10 +6,11 @@ from kneed import KneeLocator import numpy, scipy.spatial, itertools -from inference.fieldTypes import FieldTypeMemento from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from inference.analyzers import MessageAnalyzer, Value -from inference.segments import MessageSegment, AbstractSegment, CorrelatedSegment, HelperSegment, TypedSegment + +from nemere.inference.fieldTypes import FieldTypeMemento +from nemere.inference.analyzers import MessageAnalyzer, Value +from nemere.inference.segments import MessageSegment, AbstractSegment, CorrelatedSegment, HelperSegment, TypedSegment debug = False @@ -46,8 +47,8 @@ def __init__(self, segments: Iterable[AbstractSegment], method='canberra', """ Determine the distance between the given segments. - >>> from inference.analyzers import Value >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage + >>> from nemere.inference.analyzers import Value >>> >>> bytedata = bytes([1,2,3,4]) >>> message = RawMessage(bytedata) @@ -107,7 +108,7 @@ def distanceMatrix(self) -> numpy.ndarray: The order of the matrix elements in each row and column is the same as in self.segments. >>> from tabulate import tabulate - >>> from utils.baseAlgorithms import generateTestSegments + >>> from nemere.utils.baseAlgorithms import generateTestSegments >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -368,7 +369,7 @@ def distancesSubset(self, As: Sequence[AbstractSegment], Bs: Sequence[AbstractSe """ Retrieve a matrix of pairwise distances for two lists of segments. - >>> from utils.baseAlgorithms import generateTestSegments + >>> from nemere.utils.baseAlgorithms import generateTestSegments >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -415,7 +416,7 @@ def groupByLength(self) -> Dict[int, List[Tuple[int, int, Tuple[float]]]]: Used in constructor. >>> from pprint import pprint - >>> from utils.baseAlgorithms import generateTestSegments + >>> from nemere.utils.baseAlgorithms import generateTestSegments >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -595,7 +596,7 @@ def _getDistanceMatrix(distances: Iterable[Tuple[int, int, float]], segmentCount Used in constructor. >>> from tabulate import tabulate - >>> from inference.templates import DistanceCalculator + >>> from nemere.inference.templates import DistanceCalculator >>> testdata = [(3, 3, 0.0), ... (0, 3, 0.80835755871765202), ... (5, 3, 1.0), @@ -629,7 +630,7 @@ def _getDistanceMatrix(distances: Iterable[Tuple[int, int, float]], segmentCount :return: The distance matrix for the given similarities. -1 for each undefined element, 0 in the diagonal, even if not given in the input. """ - from inference.segmentHandler import matrixFromTpairs + from nemere.inference.segmentHandler import matrixFromTpairs simtrx = matrixFromTpairs(distances, range(segmentCount), incomparable=-1) # TODO handle incomparable values (resolve and replace the negative value) return simtrx @@ -745,7 +746,7 @@ def _embdedAndCalcDistances(self) -> \ complete distance list of all combinations of the into segment list regardless of their length. >>> from tabulate import tabulate - >>> from utils.baseAlgorithms import generateTestSegments + >>> from nemere.utils.baseAlgorithms import generateTestSegments >>> segments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(segments) @@ -1024,7 +1025,7 @@ def loadCached(analysisTitle: str, tokenizer: str, pcapfilename: str) -> Tuple[ :return: A cached DistanceCalculator or None """ import pickle - from validation.dissectorMatcher import MessageComparator + from nemere.validation.dissectorMatcher import MessageComparator dccacheexists, dccachefn = DistanceCalculator._checkCacheFile(analysisTitle, tokenizer, pcapfilename) @@ -1081,7 +1082,7 @@ def _manipulateChars(self, charMatchGain = .5): :return: """ from itertools import combinations - from inference.segmentHandler import filterChars + from nemere.inference.segmentHandler import filterChars assert all((isinstance(seg, AbstractSegment) for seg in self.segments)) charsequences = filterChars(self.segments) @@ -1199,8 +1200,8 @@ def distancesToMixedLength(self, dc: DistanceCalculator=None): >>> from tabulate import tabulate >>> from scipy.spatial.distance import cdist - >>> from utils.baseAlgorithms import generateTestSegments - >>> from inference.templates import DistanceCalculator, Template + >>> from nemere.utils.baseAlgorithms import generateTestSegments + >>> from nemere.inference.templates import DistanceCalculator, Template >>> listOfSegments = generateTestSegments() >>> DistanceCalculator.debug = False >>> dc = DistanceCalculator(listOfSegments) @@ -1307,7 +1308,7 @@ def toColor(self): """ oid = hash(self) # return '{:02x}'.format(oid % 0xffff) - import visualization.bcolors as bcolors + import nemere.visualization.bcolors as bcolors # Template return bcolors.colorizeStr('{:02x}'.format(oid % 0xffff), oid % 0xff) @@ -1329,7 +1330,7 @@ class TypedTemplate(Template): def __init__(self, values: Union[Tuple[Union[float, int]], MessageSegment], baseSegments: Iterable[AbstractSegment], method='canberra'): - from inference.segments import TypedSegment + from nemere.inference.segments import TypedSegment super().__init__(values, baseSegments, method) ftypes = {bs.fieldtype for bs in baseSegments if isinstance(bs, TypedSegment)} @@ -2082,7 +2083,7 @@ def _autoconfigureMPC(self): :return: min_samples, epsilon """ - from utils.baseAlgorithms import autoconfigureDBSCAN + from nemere.utils.baseAlgorithms import autoconfigureDBSCAN neighbors = [self.distanceCalculator.neigbors(seg) for seg in self.distanceCalculator.segments] epsilon, min_samples, k = autoconfigureDBSCAN(neighbors) print("eps {:0.3f} autoconfigured (MPC) from k {}".format(epsilon, k)) @@ -2094,7 +2095,7 @@ def _maximumPositiveCurvature(self): Use implementation of utils.baseAlgorithms to determine the maximum positive curvature :return: k, min_samples """ - from utils.baseAlgorithms import autoconfigureDBSCAN + from nemere.utils.baseAlgorithms import autoconfigureDBSCAN e, min_samples, k = autoconfigureDBSCAN( [self.distanceCalculator.neigbors(seg) for seg in self.distanceCalculator.segments]) return k, min_samples @@ -2146,7 +2147,7 @@ def _autoconfigureECDFKneedle(self): from math import log, ceil from scipy.ndimage.filters import gaussian_filter1d from kneed import KneeLocator - from utils.baseAlgorithms import ecdf + from nemere.utils.baseAlgorithms import ecdf # only unique! min_samples = round(log(len(self.distanceCalculator.segments))) @@ -2207,7 +2208,7 @@ def autoconfigureEvaluation(self, filename: str, markeps: float = False): from scipy.ndimage.filters import gaussian_filter1d from kneed import KneeLocator - from utils.baseAlgorithms import ecdf + from nemere.utils.baseAlgorithms import ecdf sigma = log(len(self.segments))/2 # k, min_samples = self._maximumPositiveCurvature() @@ -2775,7 +2776,7 @@ def _getDistanceMatrix(distances: Iterable[Tuple[int, int, float]], segmentCount Used in constructor. >>> from tabulate import tabulate - >>> from inference.templates import DistanceCalculator + >>> from nemere.inference.templates import DistanceCalculator >>> testdata = [(3, 3, 0.0), ... (0, 3, 0.80835755871765202), ... (5, 3, 1.0), @@ -2811,7 +2812,7 @@ def _getDistanceMatrix(distances: Iterable[Tuple[int, int, float]], segmentCount """ from tempfile import NamedTemporaryFile from sys import getsizeof - from inference.segmentHandler import matrixFromTpairs + from nemere.inference.segmentHandler import matrixFromTpairs tempfile = NamedTemporaryFile() distancesSwap = numpy.memmap(tempfile.name, dtype=numpy.float16, mode="w+", shape=(segmentCount,segmentCount)) @@ -2846,7 +2847,7 @@ def __testing_generateTestSegmentsWithDuplicates(): :return: List of message segments. """ from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage - from inference.analyzers import Value + from nemere.inference.analyzers import Value bytedata = [ bytes([1, 2, 3, 4]), bytes([2, 3, 4]), diff --git a/src/inference/trackingBIDE.py b/src/nemere/inference/trackingBIDE.py similarity index 99% rename from src/inference/trackingBIDE.py rename to src/nemere/inference/trackingBIDE.py index d36438bd..7f08f01e 100644 --- a/src/inference/trackingBIDE.py +++ b/src/nemere/inference/trackingBIDE.py @@ -1,6 +1,7 @@ -from typing import Hashable, List, Iterable +from typing import Hashable, Iterable import numpy, uuid from tabulate import tabulate + from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage diff --git a/src/utils/__init__.py b/src/nemere/utils/__init__.py similarity index 100% rename from src/utils/__init__.py rename to src/nemere/utils/__init__.py diff --git a/src/utils/baseAlgorithms.py b/src/nemere/utils/baseAlgorithms.py similarity index 97% rename from src/utils/baseAlgorithms.py rename to src/nemere/utils/baseAlgorithms.py index e26877a7..69a65887 100644 --- a/src/utils/baseAlgorithms.py +++ b/src/nemere/utils/baseAlgorithms.py @@ -44,8 +44,8 @@ def tril(arrayIn: numpy.ndarray) -> numpy.ndarray: def generateTestSegments(): from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage - from inference.analyzers import Value - from inference.segments import MessageSegment + from nemere.inference.analyzers import Value + from nemere.inference.segments import MessageSegment bytedata = [ bytes([1, 2, 3, 4]), diff --git a/src/utils/evaluationHelpers.py b/src/nemere/utils/evaluationHelpers.py similarity index 97% rename from src/utils/evaluationHelpers.py rename to src/nemere/utils/evaluationHelpers.py index f6ba2e1f..300131bc 100644 --- a/src/utils/evaluationHelpers.py +++ b/src/nemere/utils/evaluationHelpers.py @@ -2,18 +2,18 @@ Module encapsulating evaluation parameters and helper functions to validate aspects of the NEMESYS and NEMETYL approaches. """ -from typing import Union, Tuple, List, TypeVar, Hashable, Sequence, Callable, Iterable +from typing import TypeVar, Hashable, Sequence, Callable, Iterable from netzob.all import RawMessage from itertools import chain import os, csv, pickle, time -from utils.loader import SpecimenLoader -from validation.dissectorMatcher import MessageComparator, ParsedMessage -from inference.analyzers import * -from inference.segmentHandler import segmentsFromLabels, bcDeltaGaussMessageSegmentation, \ - refinements, charRefinements, segmentsFixed, pcaMocoRefinements -from inference.segments import MessageAnalyzer, TypedSegment, MessageSegment, AbstractSegment -from inference.templates import DistanceCalculator, DelegatingDC, Template, MemmapDC +from nemere.utils.loader import SpecimenLoader +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import segmentsFromLabels, bcDeltaGaussMessageSegmentation, refinements, \ + segmentsFixed +from nemere.inference.segments import MessageAnalyzer, TypedSegment, MessageSegment, AbstractSegment +from nemere.inference.templates import DistanceCalculator, DelegatingDC, Template, MemmapDC Element = TypeVar('Element') @@ -381,7 +381,7 @@ def plotMultiSegmentLines(segmentGroups: List[Tuple[str, List[Tuple[str, TypedSe :param isInteractive: :return: """ - from visualization.multiPlotter import MultiMessagePlotter + from nemere.visualization.multiPlotter import MultiMessagePlotter mmp = MultiMessagePlotter(specimens, pagetitle, len(segmentGroups), isInteractive=isInteractive) mmp.plotMultiSegmentLines(segmentGroups, colorPerLabel) @@ -390,7 +390,7 @@ def plotMultiSegmentLines(segmentGroups: List[Tuple[str, List[Tuple[str, TypedSe if typeDict: # calculate conciseness, correctness = precision, and recall import os, csv from collections import Counter - from inference.templates import Template + from nemere.inference.templates import Template # mapping from each segment in typeDict to the corresponding cluster and true type, # considering representative templates @@ -527,7 +527,7 @@ def writePerformanceStatistics(specimens, clusterer, algos, def printClusterMergeConditions(clunuAB, alignedFieldClasses, matchingConditions, dc, diff=True): - from inference.templates import Template + from nemere.inference.templates import Template from tabulate import tabulate cluTable = [(clunu, *[fv.bytes.hex() if isinstance(fv, MessageSegment) else @@ -608,9 +608,9 @@ def searchSeqOfSeg(sequence: Sequence[Union[MessageSegment, Sequence[MessageSegm def calcHexDist(hexA, hexB): from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage - from inference.analyzers import Value - from inference.segments import MessageSegment - from inference.templates import DistanceCalculator + from nemere.inference.analyzers import Value + from nemere.inference.segments import MessageSegment + from nemere.inference.templates import DistanceCalculator bytedata = [bytes.fromhex(hexA),bytes.fromhex(hexB)] messages = [RawMessage(bd) for bd in bytedata] diff --git a/src/utils/loader.py b/src/nemere/utils/loader.py similarity index 97% rename from src/utils/loader.py rename to src/nemere/utils/loader.py index 224e5b08..a22299c3 100644 --- a/src/utils/loader.py +++ b/src/nemere/utils/loader.py @@ -7,7 +7,7 @@ from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage -from validation.messageParser import ParsingConstants +from nemere.validation.messageParser import ParsingConstants class BaseLoader(object): """ @@ -68,7 +68,7 @@ def __init__(self, pcap: str, layer:int=-1, relativeToIP:bool=False): """ Load the messages from the PCAP file of the given name. - >>> from utils.loader import SpecimenLoader + >>> from nemere.utils.loader import SpecimenLoader >>> sl = SpecimenLoader('../input/random-100-continuous.pcap', layer=0, relativeToIP=True) >>> firstmessage = list(sl.messagePool.items())[0] >>> print(firstmessage[0].data.hex()) # the whole message diff --git a/src/validation/__init__.py b/src/nemere/validation/__init__.py similarity index 100% rename from src/validation/__init__.py rename to src/nemere/validation/__init__.py diff --git a/src/validation/dissectorMatcher.py b/src/nemere/validation/dissectorMatcher.py similarity index 99% rename from src/validation/dissectorMatcher.py rename to src/nemere/validation/dissectorMatcher.py index ba5076e5..dca9c6e7 100644 --- a/src/validation/dissectorMatcher.py +++ b/src/nemere/validation/dissectorMatcher.py @@ -14,9 +14,9 @@ from netzob import all as netzob from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -import visualization.bcolors as bcolors -from validation.messageParser import ParsedMessage, ParsingConstants -from inference.segments import MessageSegment, TypedSegment +import nemere.visualization.bcolors as bcolors +from nemere.validation.messageParser import ParsedMessage, ParsingConstants +from nemere.inference.segments import MessageSegment, TypedSegment class FormatMatchScore(object): @@ -49,7 +49,7 @@ class MessageComparator(object): the parameters layer, relativeToIP, and failOnUndissectable and processes the output to directly know the dissection result. """ - import utils.loader as sl + import nemere.utils.loader as sl __messageCellCache = dict() # type: Dict[(netzob.Symbol, AbstractMessage), List] @@ -210,7 +210,7 @@ def pprint2Interleaved(self, message: AbstractMessage, inferredFieldEnds: List[i Use None to create an open slice (up to the beginning or end of the message). :return: """ - import visualization.bcolors as bc + import nemere.visualization.bcolors as bc l2msg = self.messages[message] tformat = self.dissections[l2msg] @@ -302,7 +302,7 @@ def pprintInterleaved(self, symbols: List[netzob.Symbol]): :param symbols: Inferred symbols """ - import visualization.bcolors as bc + import nemere.visualization.bcolors as bc for symfes in self.__prepareMessagesForPPrint(symbols): for msg, tfe, ife in symfes: diff --git a/src/validation/messageParser.py b/src/nemere/validation/messageParser.py similarity index 99% rename from src/validation/messageParser.py rename to src/nemere/validation/messageParser.py index 18674f35..ef2bda1e 100644 --- a/src/validation/messageParser.py +++ b/src/nemere/validation/messageParser.py @@ -10,7 +10,7 @@ from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage, AbstractMessage -from validation.tsharkConnector import TsharkConnector +from nemere.validation.tsharkConnector import TsharkConnector class ParsingConstants(object): diff --git a/src/validation/netzobFormatMatchScore.py b/src/nemere/validation/netzobFormatMatchScore.py similarity index 98% rename from src/validation/netzobFormatMatchScore.py rename to src/nemere/validation/netzobFormatMatchScore.py index fdd94d13..cc99684b 100644 --- a/src/validation/netzobFormatMatchScore.py +++ b/src/nemere/validation/netzobFormatMatchScore.py @@ -13,8 +13,8 @@ from netzob.Common.Utils.MatrixList import MatrixList from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from utils.loader import SpecimenLoader -from validation.dissectorMatcher import FormatMatchScore, MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.validation.dissectorMatcher import FormatMatchScore, MessageComparator def printFMS( formatmatchmetrics: Dict[Tuple[int, netzob.Symbol, List[tuple]], Tuple[int, int, int, int, int]], diff --git a/src/validation/reportWriter.py b/src/nemere/validation/reportWriter.py similarity index 97% rename from src/validation/reportWriter.py rename to src/nemere/validation/reportWriter.py index 64a8649a..5a6a87b2 100644 --- a/src/validation/reportWriter.py +++ b/src/nemere/validation/reportWriter.py @@ -12,8 +12,8 @@ from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from utils.loader import SpecimenLoader -from validation.dissectorMatcher import FormatMatchScore, MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.validation.dissectorMatcher import FormatMatchScore, MessageComparator def calcScoreStats(scores: Iterable[float]) -> Tuple[float, float, float, float, float]: diff --git a/src/validation/tsharkConnector.py b/src/nemere/validation/tsharkConnector.py similarity index 100% rename from src/validation/tsharkConnector.py rename to src/nemere/validation/tsharkConnector.py diff --git a/src/visualization/__init__.py b/src/nemere/visualization/__init__.py similarity index 100% rename from src/visualization/__init__.py rename to src/nemere/visualization/__init__.py diff --git a/src/visualization/bcolors.py b/src/nemere/visualization/bcolors.py similarity index 100% rename from src/visualization/bcolors.py rename to src/nemere/visualization/bcolors.py diff --git a/src/visualization/distancesPlotter.py b/src/nemere/visualization/distancesPlotter.py similarity index 97% rename from src/visualization/distancesPlotter.py rename to src/nemere/visualization/distancesPlotter.py index 19ad136f..4da6bc03 100644 --- a/src/visualization/distancesPlotter.py +++ b/src/nemere/visualization/distancesPlotter.py @@ -1,6 +1,3 @@ - - - import numpy import matplotlib.pyplot as plt from matplotlib import cm, colors @@ -10,10 +7,10 @@ from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage -from visualization.plotter import MessagePlotter -from utils.loader import SpecimenLoader, BaseLoader -from inference.segments import MessageSegment, TypedSegment -from inference.templates import Template, TypedTemplate, DistanceCalculator +from nemere.visualization.plotter import MessagePlotter +from nemere.utils.loader import BaseLoader +from nemere.inference.segments import MessageSegment, TypedSegment +from nemere.inference.templates import Template, TypedTemplate, DistanceCalculator @@ -52,8 +49,8 @@ def plotManifoldDistances(self, segments: List[Union[MessageSegment, TypedSegmen given segment overlaying each other besides the distances; they are colored according to the given labels. >>> from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage - >>> from utils.loader import BaseLoader - >>> from inference.analyzers import Value + >>> from nemere.utils.loader import BaseLoader + >>> from nemere.inference.analyzers import Value >>> >>> bytedata = [ ... bytes([1, 2, 3, 4]), @@ -368,7 +365,7 @@ def plotSegmentDistanceDistribution(dc: DistanceCalculator): :param dc: :return: """ - from utils.baseAlgorithms import tril + from nemere.utils.baseAlgorithms import tril statistics = list() cltrs = [[dc.segments[idx] for idx, *rest in cluster] for cluster in dc.groupByLength().values()] for cluster in cltrs: diff --git a/src/visualization/multiPlotter.py b/src/nemere/visualization/multiPlotter.py similarity index 98% rename from src/visualization/multiPlotter.py rename to src/nemere/visualization/multiPlotter.py index 905fd3d7..bfca3169 100644 --- a/src/visualization/multiPlotter.py +++ b/src/nemere/visualization/multiPlotter.py @@ -4,16 +4,17 @@ import matplotlib.pyplot as plt import matplotlib.ticker as ticker -from visualization.plotter import MessagePlotter -from inference.segments import MessageSegment, TypedSegment, MessageAnalyzer from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage +from nemere.visualization.plotter import MessagePlotter +from nemere.inference.segments import MessageSegment, TypedSegment, MessageAnalyzer + class MultiMessagePlotter(MessagePlotter): """ Different methods to plot data of many messages individually in one big figure. """ - from utils.loader import SpecimenLoader + from nemere.utils.loader import SpecimenLoader def __init__(self, specimens: SpecimenLoader, analysisTitle: str, nrows: int, ncols: int=None, @@ -232,7 +233,7 @@ def plotCompareFill(self, analysisResults: List[List[float]], compareValues: Lis MessagePlotter.fillDiffToCompare(ax, analysisResult, compareValue) - from inference.segments import CorrelatedSegment + from nemere.inference.segments import CorrelatedSegment def plotCorrelations(self, correlations: List[CorrelatedSegment]): @@ -242,7 +243,7 @@ def plotCorrelations(self, :param correlations: List of segment correlations. """ import humanhash - from inference.segments import CorrelatedSegment + from nemere.inference.segments import CorrelatedSegment self.plotInEachAx([series.values for series in correlations], MessagePlotter.STYLE_CORRELATION + dict(label='Correlation')) diff --git a/src/visualization/plotter.py b/src/nemere/visualization/plotter.py similarity index 96% rename from src/visualization/plotter.py rename to src/nemere/visualization/plotter.py index 0575bd2a..e2621715 100644 --- a/src/visualization/plotter.py +++ b/src/nemere/visualization/plotter.py @@ -3,14 +3,14 @@ import matplotlib.pyplot as plt -from utils.evaluationHelpers import reportFolder +from nemere.utils.evaluationHelpers import reportFolder class MessagePlotter(object): """ Define basic functions and properties to plot messages. """ - from utils.loader import SpecimenLoader + from nemere.utils.loader import SpecimenLoader STYLE_MAINLINE = { 'linewidth': .6, 'alpha': .6, 'c': 'red' } STYLE_BLUMAINLINE = { 'linewidth': .6, 'alpha': .6, 'c': 'blue'} diff --git a/src/visualization/simplePrint.py b/src/nemere/visualization/simplePrint.py similarity index 96% rename from src/visualization/simplePrint.py rename to src/nemere/visualization/simplePrint.py index 337442d9..54bd14db 100644 --- a/src/visualization/simplePrint.py +++ b/src/nemere/visualization/simplePrint.py @@ -4,10 +4,10 @@ from netzob.Common.Utils.MatrixList import MatrixList from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from inference.segments import MessageSegment, TypedSegment, AbstractSegment -from inference.fieldTypes import BaseTypeMemento, RecognizedField, RecognizedVariableLengthField -from inference.templates import DistanceCalculator, Template -from visualization import bcolors as bcolors +from nemere.inference.segments import MessageSegment, TypedSegment +from nemere.inference.fieldTypes import BaseTypeMemento, RecognizedField, RecognizedVariableLengthField +from nemere.inference.templates import DistanceCalculator, Template +from nemere.visualization import bcolors as bcolors def printMatrix(lines: Iterable[Iterable], headers: Iterable=None): diff --git a/src/visualization/singlePlotter.py b/src/nemere/visualization/singlePlotter.py similarity index 96% rename from src/visualization/singlePlotter.py rename to src/nemere/visualization/singlePlotter.py index f46ea0b8..330f7618 100644 --- a/src/visualization/singlePlotter.py +++ b/src/nemere/visualization/singlePlotter.py @@ -4,9 +4,9 @@ from netzob.Model.Vocabulary.Symbol import Symbol -from visualization.plotter import MessagePlotter -from validation.dissectorMatcher import MessageComparator -from utils.loader import SpecimenLoader +from nemere.visualization.plotter import MessagePlotter +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.utils.loader import SpecimenLoader # noinspection PyMethodMayBeStatic @@ -83,7 +83,7 @@ def cumulatedFieldEnds(self, fieldEnds: List[List[int]]): def heatMapFieldComparison(self, comparator: MessageComparator, symbols: List[Symbol]): - from validation.dissectorMatcher import DissectorMatcher + from nemere.validation.dissectorMatcher import DissectorMatcher from collections import OrderedDict, Counter matchers = OrderedDict() # type: OrderedDict[symbol, Tuple[List, List, DissectorMatcher]] @@ -199,7 +199,7 @@ def histogramFieldEnds(self, symbols: List[Symbol]): :param symbols: :return: """ - from validation.dissectorMatcher import MessageComparator + from nemere.validation.dissectorMatcher import MessageComparator from collections import Counter from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage diff --git a/src/nemesys.py b/src/nemesys.py index ac5e2263..67449dba 100644 --- a/src/nemesys.py +++ b/src/nemesys.py @@ -10,8 +10,8 @@ from os.path import isfile import IPython -from utils.loader import SpecimenLoader -from inference.segmentHandler import bcDeltaGaussMessageSegmentation, refinements, symbolsFromSegments +from nemere.utils.loader import SpecimenLoader +from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation, refinements, symbolsFromSegments diff --git a/src/nemesys_field-deviation-plot.py b/src/nemesys_field-deviation-plot.py index fa4fc546..54667269 100644 --- a/src/nemesys_field-deviation-plot.py +++ b/src/nemesys_field-deviation-plot.py @@ -11,10 +11,10 @@ import matplotlib.pyplot as plt import IPython -from validation.dissectorMatcher import MessageComparator -from utils.loader import SpecimenLoader -from visualization.singlePlotter import SingleMessagePlotter -from inference.segmentHandler import bcDeltaGaussMessageSegmentation, refinements, symbolsFromSegments +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.utils.loader import SpecimenLoader +from nemere.visualization.singlePlotter import SingleMessagePlotter +from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation, refinements, symbolsFromSegments debug = False diff --git a/src/nemesys_fms.py b/src/nemesys_fms.py index 2fa2b520..bf2c9574 100644 --- a/src/nemesys_fms.py +++ b/src/nemesys_fms.py @@ -12,14 +12,12 @@ import matplotlib.pyplot as plt import IPython -from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage - -from validation.dissectorMatcher import MessageComparator, FormatMatchScore, DissectorMatcher -from utils.loader import SpecimenLoader -from inference.analyzers import * -from inference.segmentHandler import bcDeltaGaussMessageSegmentation, \ - originalRefinements, baseRefinements, pcaRefinements, pcaMocoRefinements, pcaPcaRefinements, symbolsFromSegments -from validation import reportWriter +from nemere.validation.dissectorMatcher import MessageComparator, FormatMatchScore, DissectorMatcher +from nemere.utils.loader import SpecimenLoader +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation, \ + baseRefinements, symbolsFromSegments +from nemere.validation import reportWriter debug = False @@ -79,7 +77,7 @@ def bcDeltaPlot(bcdg_mmm: List[BitCongruenceDeltaGauss]): :param bcdg_mmm: Example message analysis results to plot. Expects three elements in the list. """ - from visualization.multiPlotter import MultiMessagePlotter + from nemere.visualization.multiPlotter import MultiMessagePlotter fieldEnds = [comparator.fieldEndsPerMessage(bcdg.message) for bcdg in bcdg_mmm] diff --git a/src/nemesys_pca-refinement-iterate-params.py b/src/nemesys_pca-refinement-iterate-params.py index 6bc21318..8a75c73d 100644 --- a/src/nemesys_pca-refinement-iterate-params.py +++ b/src/nemesys_pca-refinement-iterate-params.py @@ -3,24 +3,26 @@ """ import argparse, IPython -from os.path import isfile, basename, join, splitext, exists +from os.path import isfile, basename, join, splitext from os import makedirs -from typing import Union, Any +from typing import Any import matplotlib.pyplot as plt import matplotlib.ticker as ticker import matplotlib.colors as colors from collections import Counter -from inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, Template, TypedTemplate, FieldTypeContext, \ - ClusterAutoconfException -from inference.segmentHandler import symbolsFromSegments, wobbleSegmentInMessage, isExtendedCharSeq -from inference.formatRefinement import RelocatePCA -from validation import reportWriter -from visualization.distancesPlotter import DistancesPlotter -from visualization.simplePrint import * -from utils.evaluationHelpers import * -from validation.dissectorMatcher import FormatMatchScore -from validation.dissectorMatcher import DissectorMatcher +import numpy + +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, Template, FieldTypeContext, \ + ClusterAutoconfException, DelegatingDC +from nemere.inference.segmentHandler import symbolsFromSegments, wobbleSegmentInMessage, charRefinements +from nemere.inference.formatRefinement import RelocatePCA +from nemere.validation import reportWriter +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.simplePrint import * +from nemere.utils.evaluationHelpers import * +from nemere.validation.dissectorMatcher import DissectorMatcher debug = False @@ -248,7 +250,7 @@ def wobble(interestingClusters): wobClusterer.eps = wobClusterer.eps * 0.8 wobNoise, *wobClusters = wobClusterer.clusterSimilarSegments(False) - from utils.baseAlgorithms import tril + from nemere.utils.baseAlgorithms import tril print("Wobbled cluster distances:") print(tabulate([(clunu, wobDC.distancesSubset(wobclu).max(), tril(wobDC.distancesSubset(wobclu)).mean()) for clunu, wobclu in enumerate(wobClusters)], @@ -342,7 +344,7 @@ def mostCommonValues(): # frequent values with more of those of the most frequent values) # # # # # # # # # # # # # # # # # # # # # # # # -def commonBoundsIrrelevant(): +def commonBoundsIrrelevant(relevantSubclusters): # # # # # # # # # # # # # # # # # # # # # # # # # Common Bounds refinement for NON-interestingClusters and NON-relevantSubclusters print("# "*20) diff --git a/src/nemesys_pca-refinement.py b/src/nemesys_pca-refinement.py index b8ea0d26..541c4bb4 100644 --- a/src/nemesys_pca-refinement.py +++ b/src/nemesys_pca-refinement.py @@ -5,23 +5,25 @@ import argparse, IPython from os.path import isfile, basename, join, splitext, exists from os import makedirs -from typing import Union, Any +from typing import Any import matplotlib.pyplot as plt import matplotlib.ticker as ticker import matplotlib.colors as colors from collections import Counter - -from inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, Template, TypedTemplate, FieldTypeContext, \ - ClusterAutoconfException -from inference.segmentHandler import symbolsFromSegments, wobbleSegmentInMessage, isExtendedCharSeq, \ - originalRefinements, baseRefinements, pcaRefinements, pcaPcaRefinements, zeroBaseRefinements, nemetylRefinements -from inference.formatRefinement import RelocatePCA, CropDistinct, CropChars, BlendZeroSlices, SplitFixed -from validation import reportWriter -from visualization.distancesPlotter import DistancesPlotter -from visualization.simplePrint import * -from utils.evaluationHelpers import * -from validation.dissectorMatcher import FormatMatchScore -from validation.dissectorMatcher import DissectorMatcher +import numpy + +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, Template, FieldTypeContext, \ + ClusterAutoconfException, DelegatingDC, MemmapDC +from nemere.inference.segmentHandler import symbolsFromSegments, wobbleSegmentInMessage, isExtendedCharSeq, \ + originalRefinements, baseRefinements, pcaRefinements, pcaPcaRefinements, zeroBaseRefinements, nemetylRefinements, \ + charRefinements +from nemere.inference.formatRefinement import RelocatePCA, CropDistinct, CropChars, BlendZeroSlices, SplitFixed +from nemere.validation import reportWriter +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.simplePrint import * +from nemere.utils.evaluationHelpers import * +from nemere.validation.dissectorMatcher import DissectorMatcher debug = False @@ -261,7 +263,7 @@ def wobble(interestingClusters): wobClusterer.eps = wobClusterer.eps * 0.8 wobNoise, *wobClusters = wobClusterer.clusterSimilarSegments(False) - from utils.baseAlgorithms import tril + from nemere.utils.baseAlgorithms import tril print("Wobbled cluster distances:") print(tabulate([(clunu, wobDC.distancesSubset(wobclu).max(), tril(wobDC.distancesSubset(wobclu)).mean()) for clunu, wobclu in enumerate(wobClusters)], diff --git a/src/nemetyl_align-segments.py b/src/nemetyl_align-segments.py index 872e0711..eb160231 100644 --- a/src/nemetyl_align-segments.py +++ b/src/nemetyl_align-segments.py @@ -10,19 +10,22 @@ import argparse, IPython from os.path import isfile, splitext, basename, exists, join +import numpy from tabulate import tabulate # # change drawing toolkit for matplotlib (to be set before the pyplot import) # import matplotlib as mpl # mpl.use('Agg') -from alignment.alignMessages import SegmentedMessages -from inference.segmentHandler import originalRefinements, baseRefinements, pcaRefinements -from alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity -from utils.evaluationHelpers import * -from visualization.multiPlotter import MultiMessagePlotter -from alignment.clusterMerging import ClusterMerger -from utils.baseAlgorithms import ecdf +from nemere.alignment.alignMessages import SegmentedMessages +from nemere.inference.segmentHandler import originalRefinements, baseRefinements, pcaRefinements, pcaMocoRefinements +from nemere.alignment.hirschbergAlignSegments import HirschbergOnSegmentSimilarity +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DistanceCalculator +from nemere.utils.evaluationHelpers import * +from nemere.visualization.multiPlotter import MultiMessagePlotter +from nemere.alignment.clusterMerging import ClusterMerger +from nemere.utils.baseAlgorithms import ecdf debug = False @@ -513,7 +516,7 @@ def epsautoconfeval(epsilon): if withplots: # plot distances and message clusters print("Plot distances...") - from visualization.distancesPlotter import DistancesPlotter + from nemere.visualization.distancesPlotter import DistancesPlotter dp = DistancesPlotter(specimens, 'message-distances-' + plotTitle, False) dp.plotManifoldDistances( [specimens.messagePool[seglist[0].message] for seglist in segmentedMessages], @@ -552,7 +555,7 @@ def epsautoconfeval(epsilon): # split clusters based on fields without rare values # # # # # # # # # # # # # # # # # # # # # # # # if not args.split: - from alignment.clusterSplitting import * + from nemere.alignment.clusterSplitting import * cSplitter = RelaxedExoticClusterSplitter(6 if not tokenizer == "tshark" else 3, alignedClusters, messageClusters, sm) @@ -567,7 +570,7 @@ def epsautoconfeval(epsilon): if withplots: # plot distances and message clusters print("Plot distances...") - from visualization.distancesPlotter import DistancesPlotter + from nemere.visualization.distancesPlotter import DistancesPlotter dp = DistancesPlotter(specimens, 'message-distances-' + plotTitle + '-split', False) dp.plotManifoldDistances( @@ -750,7 +753,7 @@ def epsautoconfeval(epsilon): if withplots: # plot distances and message clusters print("Plot distances...") - from visualization.distancesPlotter import DistancesPlotter + from nemere.visualization.distancesPlotter import DistancesPlotter dp = DistancesPlotter(specimens, 'message-distances-' + plotTitle + '-split', False) dp.plotManifoldDistances( @@ -793,7 +796,7 @@ def epsautoconfeval(epsilon): # # # # # # # # # # # # # # # # # # # # # # # # # TODO fully integrate into/encapsulate in ClusterMerger class print("Check for cluster merge candidates...") - from utils.evaluationHelpers import printClusterMergeConditions + from nemere.utils.evaluationHelpers import printClusterMergeConditions # noinspection PyUnreachableCode if True: # ClusterMerger @@ -850,7 +853,7 @@ def epsautoconfeval(epsilon): comparator) from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage - from visualization.distancesPlotter import DistancesPlotter + from nemere.visualization.distancesPlotter import DistancesPlotter typedClusterDummys = list() for clunu in clusterclusterer.clusterOrder: clusta = None diff --git a/src/nemezero_pca-refinement.py b/src/nemezero_pca-refinement.py index bdc2d8ad..0a2cb073 100644 --- a/src/nemezero_pca-refinement.py +++ b/src/nemezero_pca-refinement.py @@ -5,23 +5,27 @@ import argparse, IPython from os.path import isfile, basename, join, splitext, exists from os import makedirs -from typing import Union, Any +from typing import Any import matplotlib.pyplot as plt import matplotlib.ticker as ticker import matplotlib.colors as colors from collections import Counter -from inference.templates import DBSCANsegmentClusterer, FieldTypeTemplate, Template, TypedTemplate, FieldTypeContext, \ - ClusterAutoconfException -from inference.segmentHandler import symbolsFromSegments, wobbleSegmentInMessage, isExtendedCharSeq, \ - originalRefinements, baseRefinements, pcaRefinements, pcaPcaRefinements, zeroBaseRefinements -from inference.formatRefinement import RelocatePCA, CropDistinct, BlendZeroSlices, CropChars, SplitFixed -from validation import reportWriter -from visualization.distancesPlotter import DistancesPlotter -from visualization.simplePrint import * -from utils.evaluationHelpers import * -from validation.dissectorMatcher import FormatMatchScore -from validation.dissectorMatcher import DissectorMatcher +import numpy + +from nemere.inference.analyzers import Value +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DBSCANsegmentClusterer, Template, FieldTypeContext, \ + ClusterAutoconfException, DistanceCalculator, MemmapDC +from nemere.inference.segmentHandler import symbolsFromSegments, isExtendedCharSeq, \ + baseRefinements, pcaRefinements, pcaPcaRefinements +from nemere.inference.formatRefinement import RelocatePCA, CropDistinct, BlendZeroSlices, CropChars, SplitFixed +from nemere.utils.loader import SpecimenLoader +from nemere.validation import reportWriter +from nemere.visualization.simplePrint import * +from nemere.utils.evaluationHelpers import * +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.validation.dissectorMatcher import DissectorMatcher debug = False diff --git a/src/netzob_fms.py b/src/netzob_fms.py index 8c29f35e..8315505d 100644 --- a/src/netzob_fms.py +++ b/src/netzob_fms.py @@ -23,10 +23,10 @@ from netzob import all as netzob from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -from utils.loader import SpecimenLoader -from validation.messageParser import ParsedMessage -from validation.dissectorMatcher import MessageComparator, DissectorMatcher -import validation.netzobFormatMatchScore as FMS +from nemere.utils.loader import SpecimenLoader +from nemere.validation.messageParser import ParsedMessage +from nemere.validation.dissectorMatcher import MessageComparator, DissectorMatcher +import nemere.validation.netzobFormatMatchScore as FMS debug = True diff --git a/src/netzob_messagetypes.py b/src/netzob_messagetypes.py index eb7a4337..9f11c5b5 100644 --- a/src/netzob_messagetypes.py +++ b/src/netzob_messagetypes.py @@ -16,19 +16,15 @@ from os.path import isfile, join from typing import Dict, Tuple, List -import matplotlib.colors -import matplotlib.pyplot as plt -import IPython - from netzob import all as netzob from netzob.Model.Vocabulary.Messages.AbstractMessage import AbstractMessage -import utils.evaluationHelpers as eh -from inference.segments import HelperSegment -from inference.analyzers import NoneAnalysis -from utils.loader import SpecimenLoader -from validation.messageParser import ParsedMessage -from validation.dissectorMatcher import MessageComparator +import nemere.utils.evaluationHelpers as eh +from nemere.inference.segments import HelperSegment +from nemere.inference.analyzers import NoneAnalysis +from nemere.utils.loader import SpecimenLoader +from nemere.validation.messageParser import ParsedMessage +from nemere.validation.dissectorMatcher import MessageComparator debug = True diff --git a/src/prep_deduplicate-trace.py b/src/prep_deduplicate-trace.py index c02d926a..550d5aff 100644 --- a/src/prep_deduplicate-trace.py +++ b/src/prep_deduplicate-trace.py @@ -4,14 +4,17 @@ This way, generates comparable traces as evaluation input. """ -import logging # hide warnings of scapy: https://stackoverflow.com/questions/24812604/hide-scapy-warning-message-ipv6 -logging.getLogger("scapy.runtime").setLevel(logging.ERROR) -from scapy.all import * import argparse from os.path import exists,isfile,splitext from collections import OrderedDict -from validation.messageParser import ParsingConstants +import logging # hide warnings of scapy: https://stackoverflow.com/questions/24812604/hide-scapy-warning-message-ipv6 +logging.getLogger("scapy.runtime").setLevel(logging.ERROR) +from scapy.layers.inet import IP +from scapy.layers.l2 import Ether +from scapy.all import sniff, wrpcap + +from nemere.validation.messageParser import ParsingConstants PACKET_LIMIT = 1000 diff --git a/src/prep_filter-maxdiff-trace.py b/src/prep_filter-maxdiff-trace.py index 2bd4e7fe..4a5171ff 100644 --- a/src/prep_filter-maxdiff-trace.py +++ b/src/prep_filter-maxdiff-trace.py @@ -23,16 +23,15 @@ import scapy.all as sy from tabulate import tabulate -from netzob.Model.Vocabulary.Messages.RawMessage import RawMessage, AbstractMessage -from netzob.Model.Vocabulary.Messages.L4NetworkMessage import L4NetworkMessage -from netzob.Model.Vocabulary.Messages.L3NetworkMessage import L3NetworkMessage -from netzob.Model.Vocabulary.Messages.L2NetworkMessage import L2NetworkMessage - -from inference.segmentHandler import bcDeltaGaussMessageSegmentation, zeroBaseRefinements, MessageSegment, DelegatingDC -from inference.analyzers import Value -from utils.loader import SpecimenLoader, BaseLoader -from validation.dissectorMatcher import MessageComparator -from validation.messageParser import ParsedMessage +from netzob.Model.Vocabulary.Messages.RawMessage import AbstractMessage + +from nemere.inference.segmentHandler import bcDeltaGaussMessageSegmentation, zeroBaseRefinements +from nemere.inference.segments import MessageSegment +from nemere.inference.templates import DelegatingDC +from nemere.inference.analyzers import Value +from nemere.utils.loader import SpecimenLoader, BaseLoader +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.validation.messageParser import ParsedMessage PACKET_LIMIT = 100 sigma = 1.2 diff --git a/src/refine-segmenter.py b/src/refine-segmenter.py index e7c08dc8..0ea3fd3d 100644 --- a/src/refine-segmenter.py +++ b/src/refine-segmenter.py @@ -5,12 +5,12 @@ import IPython -from inference.segmentHandler import originalRefinements, baseRefinements, pcaPcaRefinements, pcaMocoRefinements, \ - isExtendedCharSeq, symbolsFromSegments -from inference.segments import MessageAnalyzer, MessageSegment -from validation import reportWriter -from validation.dissectorMatcher import DissectorMatcher -from utils.evaluationHelpers import analyses, cacheAndLoadDC, annotateFieldTypes, reportFolder +from nemere.inference.segmentHandler import originalRefinements, baseRefinements, pcaPcaRefinements, \ + pcaMocoRefinements, isExtendedCharSeq, symbolsFromSegments +from nemere.inference.segments import MessageSegment +from nemere.validation import reportWriter +from nemere.validation.dissectorMatcher import DissectorMatcher +from nemere.utils.evaluationHelpers import analyses, cacheAndLoadDC, annotateFieldTypes, reportFolder debug = False diff --git a/src/test_segment-refinements.py b/src/test_segment-refinements.py index f275e3a7..eb0395dd 100644 --- a/src/test_segment-refinements.py +++ b/src/test_segment-refinements.py @@ -15,24 +15,18 @@ from tabulate import tabulate import IPython -import inference.segmentHandler as sh -import utils.evaluationHelpers -from inference.formatRefinement import MergeConsecutiveChars, ResplitConsecutiveChars,\ - CropDistinct, CumulativeCharMerger, SplitFixed, RelocateZeros -from utils.loader import SpecimenLoader -from inference.analyzers import * -from inference.segments import MessageSegment, TypedSegment -from inference.templates import DistanceCalculator, DBSCANsegmentClusterer -from validation.dissectorMatcher import MessageComparator, FormatMatchScore, DissectorMatcher -from validation import reportWriter -from visualization.multiPlotter import MultiMessagePlotter -from visualization.distancesPlotter import DistancesPlotter - -from characterize_fieldtypes import analyses, labelForSegment -from utils.evaluationHelpers import plotMultiSegmentLines, sigmapertrace -from inference.segmentHandler import filterSegments, symbolsFromSegments - +import nemere.inference.segmentHandler as sh +from nemere.inference.formatRefinement import MergeConsecutiveChars, ResplitConsecutiveChars, CumulativeCharMerger +from nemere.utils.loader import SpecimenLoader +from nemere.inference.analyzers import * +from nemere.inference.segments import MessageSegment +from nemere.validation.dissectorMatcher import MessageComparator, DissectorMatcher +from nemere.validation import reportWriter +from nemere.visualization.multiPlotter import MultiMessagePlotter +from nemere.utils.evaluationHelpers import sigmapertrace +from nemere.inference.segmentHandler import symbolsFromSegments +from characterize_fieldtypes import analyses def removeIdenticalLabels(plt): @@ -288,8 +282,8 @@ def plotLinesSegmentValues(segments: List[Tuple[str, MessageSegment]]): if args.interactive: - import visualization.simplePrint as sp - import utils.evaluationHelpers as eh + import nemere.visualization.simplePrint as sp + import nemere.utils.evaluationHelpers as eh IPython.embed() exit() diff --git a/src/transform_cluster-statistics.py b/src/transform_cluster-statistics.py index dd2b307f..89a4695d 100644 --- a/src/transform_cluster-statistics.py +++ b/src/transform_cluster-statistics.py @@ -9,9 +9,9 @@ import csv, os from tabulate import tabulate -from typing import Dict, Tuple, List +from typing import Dict, List -from utils.evaluationHelpers import reportFolder, scStatsFile +from nemere.utils.evaluationHelpers import reportFolder, scStatsFile cols = [ # 0 1 2 3 4 5 6 7 diff --git a/src/visualize_fieldtype_separation.py b/src/visualize_fieldtype_separation.py index 74b693ae..5449d85e 100644 --- a/src/visualize_fieldtype_separation.py +++ b/src/visualize_fieldtype_separation.py @@ -13,15 +13,14 @@ from itertools import chain from os.path import isfile -from inference.analyzers import * -from inference.segmentHandler import segments2types, filterSegments -from inference.segments import TypedSegment -from inference.templates import DistanceCalculator, Template, TemplateGenerator, DelegatingDC, MemmapDC -from utils.evaluationHelpers import annotateFieldTypes -from utils.loader import SpecimenLoader -from validation.dissectorMatcher import MessageComparator -from visualization.distancesPlotter import DistancesPlotter -from visualization.multiPlotter import MultiMessagePlotter +from nemere.inference.analyzers import * +from nemere.inference.segmentHandler import segments2types +from nemere.inference.templates import Template, TemplateGenerator, MemmapDC +from nemere.utils.evaluationHelpers import annotateFieldTypes +from nemere.utils.loader import SpecimenLoader +from nemere.validation.dissectorMatcher import MessageComparator +from nemere.visualization.distancesPlotter import DistancesPlotter +from nemere.visualization.multiPlotter import MultiMessagePlotter debug = False