diff --git a/.flake8 b/.flake8 index 96a1bcff..d7afb7d1 100644 --- a/.flake8 +++ b/.flake8 @@ -24,3 +24,4 @@ ignore = ANN101 per-file-ignores = scripts/*:T201 + scripts/benchmark_pdf_performance*:JS101,T201 diff --git a/VERSION b/VERSION index 7c327287..61618788 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.1 \ No newline at end of file +2.2 \ No newline at end of file diff --git a/dedoc/api/schema/table_metadata.py b/dedoc/api/schema/table_metadata.py index 779af066..53299a16 100644 --- a/dedoc/api/schema/table_metadata.py +++ b/dedoc/api/schema/table_metadata.py @@ -10,3 +10,4 @@ class TableMetadata(BaseModel): page_id: Optional[int] = Field(description="Number of the page where the table starts", example=0) uid: str = Field(description="Unique identifier of the table", example="e8ba5523-8546-4804-898c-2f4835a1804f") rotated_angle: float = Field(description="Value of the rotation angle (in degrees) by which the table was rotated during recognition", example=1.0) + title: str = Field(description="Table's title") diff --git a/dedoc/api/web/index.html b/dedoc/api/web/index.html index 6a947230..5ca05cec 100644 --- a/dedoc/api/web/index.html +++ b/dedoc/api/web/index.html @@ -37,6 +37,7 @@

Type of document structure parsing

+ document_type

diff --git a/dedoc/attachments_extractors/concrete_attachments_extractors/docx_attachments_extractor.py b/dedoc/attachments_extractors/concrete_attachments_extractors/docx_attachments_extractor.py index 1c307409..8919c890 100644 --- a/dedoc/attachments_extractors/concrete_attachments_extractors/docx_attachments_extractor.py +++ b/dedoc/attachments_extractors/concrete_attachments_extractors/docx_attachments_extractor.py @@ -29,7 +29,7 @@ def can_extract(self, """ Checks if this extractor can get attachments from the document (it should have .docx extension) """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.docx_like_format or mime in recognized_mimes.docx_like_format def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: diff --git a/dedoc/attachments_extractors/concrete_attachments_extractors/excel_attachments_extractor.py b/dedoc/attachments_extractors/concrete_attachments_extractors/excel_attachments_extractor.py index cf5cfefa..96834db6 100644 --- a/dedoc/attachments_extractors/concrete_attachments_extractors/excel_attachments_extractor.py +++ b/dedoc/attachments_extractors/concrete_attachments_extractors/excel_attachments_extractor.py @@ -22,7 +22,7 @@ def can_extract(self, """ Checks if this extractor can get attachments from the document (it should have .xlsx extension) """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.excel_like_format or mime in recognized_mimes.excel_like_format def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: diff --git a/dedoc/attachments_extractors/concrete_attachments_extractors/json_attachment_extractor.py b/dedoc/attachments_extractors/concrete_attachments_extractors/json_attachment_extractor.py index 39e11c69..83fd572a 100644 --- a/dedoc/attachments_extractors/concrete_attachments_extractors/json_attachment_extractor.py +++ b/dedoc/attachments_extractors/concrete_attachments_extractors/json_attachment_extractor.py @@ -22,7 +22,7 @@ def can_extract(self, """ Checks if this extractor can get attachments from the document (it should have .json extension) """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower().endswith(".json") def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: diff --git a/dedoc/attachments_extractors/concrete_attachments_extractors/pdf_attachments_extractor.py b/dedoc/attachments_extractors/concrete_attachments_extractors/pdf_attachments_extractor.py index 0ae13fb4..e4a53c74 100644 --- a/dedoc/attachments_extractors/concrete_attachments_extractors/pdf_attachments_extractor.py +++ b/dedoc/attachments_extractors/concrete_attachments_extractors/pdf_attachments_extractor.py @@ -9,7 +9,7 @@ from dedoc.attachments_extractors.abstract_attachment_extractor import AbstractAttachmentsExtractor from dedoc.data_structures.attached_file import AttachedFile -from dedoc.extensions import recognized_extensions, recognized_mimes +from dedoc.extensions import recognized_mimes from dedoc.utils.utils import convert_datetime, get_mime_extension, get_unique_name @@ -28,8 +28,8 @@ def can_extract(self, """ Checks if this extractor can get attachments from the document (it should have .pdf extension) """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) - return extension.lower() in recognized_extensions.docx_like_format or mime in recognized_mimes.docx_like_format + mime, _ = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + return mime in recognized_mimes.pdf_like_format def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: """ diff --git a/dedoc/attachments_extractors/concrete_attachments_extractors/pptx_attachments_extractor.py b/dedoc/attachments_extractors/concrete_attachments_extractors/pptx_attachments_extractor.py index 34acdef4..4b9ecb54 100644 --- a/dedoc/attachments_extractors/concrete_attachments_extractors/pptx_attachments_extractor.py +++ b/dedoc/attachments_extractors/concrete_attachments_extractors/pptx_attachments_extractor.py @@ -22,7 +22,7 @@ def can_extract(self, """ Checks if this extractor can get attachments from the document (it should have .pptx extension) """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.pptx_like_format or mime in recognized_mimes.pptx_like_format def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: diff --git a/dedoc/attachments_handler/attachments_handler.py b/dedoc/attachments_handler/attachments_handler.py index 1017ad45..1935e5d2 100644 --- a/dedoc/attachments_handler/attachments_handler.py +++ b/dedoc/attachments_handler/attachments_handler.py @@ -4,10 +4,10 @@ import time from typing import List, Optional -from dedoc.attachments_extractors import AbstractAttachmentsExtractor from dedoc.common.exceptions.dedoc_error import DedocError from dedoc.data_structures import AttachedFile, DocumentMetadata, ParsedDocument from dedoc.data_structures.unstructured_document import UnstructuredDocument +from dedoc.utils.parameter_utils import get_param_with_attachments from dedoc.utils.utils import get_empty_content @@ -39,11 +39,11 @@ def handle_attachments(self, document_parser: "DedocManager", document: Unstruct are important, look to the API parameters documentation for more details). :return: list of parsed document attachments """ - parsed_attachment_files = [] + attachments = [] recursion_deep_attachments = int(parameters.get("recursion_deep_attachments", 10)) - 1 - if not AbstractAttachmentsExtractor.with_attachments(parameters) or recursion_deep_attachments < 0: - return parsed_attachment_files + if not get_param_with_attachments(parameters) or recursion_deep_attachments < 0: + return attachments previous_log_time = time.time() @@ -73,8 +73,8 @@ def handle_attachments(self, document_parser: "DedocManager", document: Unstruct parsed_file = self.__get_empty_document(document_parser=document_parser, attachment=attachment, parameters=parameters_copy) parsed_file.metadata.set_uid(attachment.uid) - parsed_attachment_files.append(parsed_file) - return parsed_attachment_files + attachments.append(parsed_file) + return attachments def __get_empty_document(self, document_parser: "DedocManager", attachment: AttachedFile, parameters: dict) -> ParsedDocument: # noqa metadata = document_parser.document_metadata_extractor.extract( diff --git a/dedoc/config.py b/dedoc/config.py index f3c374eb..06d98894 100644 --- a/dedoc/config.py +++ b/dedoc/config.py @@ -43,7 +43,10 @@ # TESSERACT OCR confidence threshold ( values: [-1 - undefined; 0.0 : 100.0 % - confidence value) ocr_conf_threshold=40.0, # max depth of document structure tree - recursion_deep_subparagraphs=30 + recursion_deep_subparagraphs=30, + + # -------------------------------------------EXTERNAL SERVICES SETTINGS--------------------------------------------- + grobid_max_connection_attempts=3 ) diff --git a/dedoc/converters/concrete_converters/binary_converter.py b/dedoc/converters/concrete_converters/binary_converter.py index 46142cff..ba7741cf 100644 --- a/dedoc/converters/concrete_converters/binary_converter.py +++ b/dedoc/converters/concrete_converters/binary_converter.py @@ -23,7 +23,7 @@ def can_convert(self, """ Checks if the document is image-like (e.g. it has .bmp, .jpg, .tiff, etc. extension) and has `mime=application/octet-stream`. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return mime == "application/octet-stream" and extension in supported_image_types def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/docx_converter.py b/dedoc/converters/concrete_converters/docx_converter.py index 3b50416a..ad8855ec 100644 --- a/dedoc/converters/concrete_converters/docx_converter.py +++ b/dedoc/converters/concrete_converters/docx_converter.py @@ -22,7 +22,7 @@ def can_convert(self, """ Checks if the document is docx-like, e.g. it has .doc, .rtf or .odt extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.docx_like_format or mime in converted_mimes.docx_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/excel_converter.py b/dedoc/converters/concrete_converters/excel_converter.py index 1396a12b..8aaa8809 100644 --- a/dedoc/converters/concrete_converters/excel_converter.py +++ b/dedoc/converters/concrete_converters/excel_converter.py @@ -22,7 +22,7 @@ def can_convert(self, """ Checks if the document is xlsx-like, e.g. it has .xls or .ods extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.excel_like_format or mime in converted_mimes.excel_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/pdf_converter.py b/dedoc/converters/concrete_converters/pdf_converter.py index f0b929e8..01483d4c 100644 --- a/dedoc/converters/concrete_converters/pdf_converter.py +++ b/dedoc/converters/concrete_converters/pdf_converter.py @@ -22,7 +22,7 @@ def can_convert(self, """ Checks if the document is pdf-like, e.g. it has .djvu extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.pdf_like_format or mime in converted_mimes.pdf_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/png_converter.py b/dedoc/converters/concrete_converters/png_converter.py index cb50245d..6044c970 100644 --- a/dedoc/converters/concrete_converters/png_converter.py +++ b/dedoc/converters/concrete_converters/png_converter.py @@ -25,7 +25,7 @@ def can_convert(self, """ Checks if the document is image-like, e.g. it has .bmp, .jpg, .tiff, etc. extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.image_like_format or mime in converted_mimes.image_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/pptx_converter.py b/dedoc/converters/concrete_converters/pptx_converter.py index d1e7aec3..afce2b94 100644 --- a/dedoc/converters/concrete_converters/pptx_converter.py +++ b/dedoc/converters/concrete_converters/pptx_converter.py @@ -22,7 +22,7 @@ def can_convert(self, """ Checks if the document is pptx-like, e.g. it has .ppt or .odp extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.pptx_like_format or mime in converted_mimes.pptx_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/concrete_converters/txt_converter.py b/dedoc/converters/concrete_converters/txt_converter.py index b1543fa0..1f384d59 100644 --- a/dedoc/converters/concrete_converters/txt_converter.py +++ b/dedoc/converters/concrete_converters/txt_converter.py @@ -23,7 +23,7 @@ def can_convert(self, """ Checks if the document is txt-like, e.g. it has .xml extension. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in converted_extensions.txt_like_format or mime in converted_mimes.txt_like_format def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/dedoc/converters/converter_composition.py b/dedoc/converters/converter_composition.py index cf12c2ed..63543d4e 100644 --- a/dedoc/converters/converter_composition.py +++ b/dedoc/converters/converter_composition.py @@ -29,7 +29,7 @@ def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: :param parameters: parameters of converting, see :ref:`parameters_description` for more details :return: path of converted file if conversion was executed else path of the original file """ - extension, mime = get_mime_extension(file_path=file_path) + mime, extension = get_mime_extension(file_path=file_path) converted_file_path = file_path for converter in self.converters: diff --git a/dedoc/data_structures/concrete_annotations/__init__.py b/dedoc/data_structures/concrete_annotations/__init__.py index 529acaa0..264abda0 100644 --- a/dedoc/data_structures/concrete_annotations/__init__.py +++ b/dedoc/data_structures/concrete_annotations/__init__.py @@ -15,7 +15,8 @@ from .superscript_annotation import SuperscriptAnnotation from .table_annotation import TableAnnotation from .underlined_annotation import UnderlinedAnnotation +from .reference_annotation import ReferenceAnnotation __all__ = ['AlignmentAnnotation', 'AttachAnnotation', 'BBoxAnnotation', 'BoldAnnotation', 'ColorAnnotation', 'ConfidenceAnnotation', 'IndentationAnnotation', 'ItalicAnnotation', 'LinkedTextAnnotation', 'SizeAnnotation', 'SpacingAnnotation', 'StrikeAnnotation', - 'StyleAnnotation', 'SubscriptAnnotation', 'SuperscriptAnnotation', 'TableAnnotation', 'UnderlinedAnnotation'] + 'StyleAnnotation', 'SubscriptAnnotation', 'SuperscriptAnnotation', 'TableAnnotation', 'UnderlinedAnnotation', 'ReferenceAnnotation'] diff --git a/dedoc/data_structures/concrete_annotations/reference_annotation.py b/dedoc/data_structures/concrete_annotations/reference_annotation.py new file mode 100644 index 00000000..e629ba8b --- /dev/null +++ b/dedoc/data_structures/concrete_annotations/reference_annotation.py @@ -0,0 +1,43 @@ +from dedoc.data_structures.annotation import Annotation + + +class ReferenceAnnotation(Annotation): + """ + This annotation points to a place in the document text that is a link to another line in the document (for example, another textual line). + + Example of usage for document_type="article" with the example of link on the bibliography_item :class:`~dedoc.data_structures.LineWithMeta`. + + LineWithMeta: + + .. code-block:: python + + LineWithMeta( # the line with the reference annotation + line="As for the PRF, we use the tree-based construction from Goldreich, Goldwasser and Micali [18]", + metadata=LineMetadata(page_id=0, line_id=32), + annotations=[ReferenceAnnotation(start=90, end=92, value="97cfac39-f0e3-11ee-b81c-b88584b4e4a1"), ...] + ) + + other LineWithMeta: + + .. code-block:: python + + LineWithMeta( # The line referenced by the previous one + line="some your text (can be empty)", + metadata=LineMetadata( + page_id=10, + line_id=189, + tag_hierarchy_level=HierarchyLevel(level1=2, level2=0, paragraph_type="bibliography_item")), + other_fields={"uid": "97cfac39-f0e3-11ee-b81c-b88584b4e4a1"} + ), + annotations=[] + ) + """ + name = "reference" + + def __init__(self, value: str, start: int, end: int) -> None: + """ + :param value: unique identifier of the line to which this annotation refers + :param start: start of the annotated text with a link + :param end: end of the annotated text with a link + """ + super().__init__(start=start, end=end, name=ReferenceAnnotation.name, value=value, is_mergeable=False) diff --git a/dedoc/data_structures/line_metadata.py b/dedoc/data_structures/line_metadata.py index 504c5110..19b6730a 100644 --- a/dedoc/data_structures/line_metadata.py +++ b/dedoc/data_structures/line_metadata.py @@ -30,9 +30,9 @@ def __init__(self, self.hierarchy_level = hierarchy_level self.page_id = page_id self.line_id = line_id + self.__other_fields = {} if other_fields is not None and len(other_fields) > 0: self.extend_other_fields(other_fields) - self.__other_fields = {} def extend_other_fields(self, new_fields: dict) -> None: """ diff --git a/dedoc/data_structures/line_with_meta.py b/dedoc/data_structures/line_with_meta.py index ca954573..798a1712 100644 --- a/dedoc/data_structures/line_with_meta.py +++ b/dedoc/data_structures/line_with_meta.py @@ -136,7 +136,8 @@ def set_line(self, line: str) -> None: self._line = line def __repr__(self) -> str: - return f"LineWithMeta({self.line[:65]})" + return (f"LineWithMeta({self.line[:65]}, " + f"tagHL={self.metadata.tag_hierarchy_level.level_1, self.metadata.tag_hierarchy_level.level_2, self.metadata.tag_hierarchy_level.line_type})") def __add__(self, other: Union["LineWithMeta", str]) -> "LineWithMeta": assert isinstance(other, (LineWithMeta, str)) diff --git a/dedoc/data_structures/table_metadata.py b/dedoc/data_structures/table_metadata.py index a70ab2b4..fc934d9a 100644 --- a/dedoc/data_structures/table_metadata.py +++ b/dedoc/data_structures/table_metadata.py @@ -9,15 +9,17 @@ class TableMetadata(Serializable): """ This class holds the information about table unique identifier, rotation angle (if table has been rotated - for images) and so on. """ - def __init__(self, page_id: Optional[int], uid: Optional[str] = None, rotated_angle: float = 0.0) -> None: + def __init__(self, page_id: Optional[int], uid: Optional[str] = None, rotated_angle: float = 0.0, title: str = "") -> None: """ :param page_id: number of the page where table starts :param uid: unique identifier of the table :param rotated_angle: value of the rotation angle by which the table was rotated during recognition + :param title: table's title """ self.page_id = page_id self.uid = str(uuid.uuid4()) if not uid else uid self.rotated_angle = rotated_angle + self.title = title def to_api_schema(self) -> ApiTableMetadata: - return ApiTableMetadata(uid=self.uid, page_id=self.page_id, rotated_angle=self.rotated_angle) + return ApiTableMetadata(uid=self.uid, page_id=self.page_id, rotated_angle=self.rotated_angle, title=self.title) diff --git a/dedoc/manager_config.py b/dedoc/manager_config.py index 6854c6f4..679db954 100644 --- a/dedoc/manager_config.py +++ b/dedoc/manager_config.py @@ -1,5 +1,7 @@ from typing import Optional +from dedoc.readers.article_reader.article_reader import ArticleReader + def _get_manager_config(config: dict) -> dict: """ @@ -57,6 +59,7 @@ def _get_manager_config(config: dict) -> dict: BinaryConverter(config=config) ] readers = [ + ArticleReader(config=config), DocxReader(config=config), ExcelReader(config=config), PptxReader(config=config), diff --git a/dedoc/readers/__init__.py b/dedoc/readers/__init__.py index 7c6cce29..2d96fdae 100644 --- a/dedoc/readers/__init__.py +++ b/dedoc/readers/__init__.py @@ -1,4 +1,5 @@ from .archive_reader.archive_reader import ArchiveReader +from .article_reader.article_reader import ArticleReader from .base_reader import BaseReader from .csv_reader.csv_reader import CSVReader from .docx_reader.docx_reader import DocxReader @@ -17,6 +18,6 @@ from .reader_composition import ReaderComposition from .txt_reader.raw_text_reader import RawTextReader -__all__ = ['ArchiveReader', 'BaseReader', 'CSVReader', 'DocxReader', 'EmailReader', 'ExcelReader', 'HtmlReader', 'JsonReader', 'MhtmlReader', +__all__ = ['ArchiveReader', 'ArticleReader', 'BaseReader', 'CSVReader', 'DocxReader', 'EmailReader', 'ExcelReader', 'HtmlReader', 'JsonReader', 'MhtmlReader', 'NoteReader', 'PptxReader', 'ReaderComposition', 'RawTextReader', 'PdfBaseReader', 'PdfImageReader', 'PdfTabbyReader', 'PdfTxtlayerReader', 'PdfAutoReader'] diff --git a/dedoc/readers/archive_reader/archive_reader.py b/dedoc/readers/archive_reader/archive_reader.py index d8831b58..589014ac 100644 --- a/dedoc/readers/archive_reader/archive_reader.py +++ b/dedoc/readers/archive_reader/archive_reader.py @@ -29,7 +29,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.archive_like_format or mime in recognized_mimes.archive_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/article_reader/__init__.py b/dedoc/readers/article_reader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dedoc/readers/article_reader/article_reader.py b/dedoc/readers/article_reader/article_reader.py new file mode 100644 index 00000000..f2169452 --- /dev/null +++ b/dedoc/readers/article_reader/article_reader.py @@ -0,0 +1,365 @@ +import os +import time +from typing import Dict, List, Optional, Tuple + +import requests +from bs4 import BeautifulSoup, Tag + +from dedoc.data_structures import Annotation, CellWithMeta, HierarchyLevel, LineMetadata, Table, TableAnnotation, TableMetadata +from dedoc.data_structures.concrete_annotations.reference_annotation import ReferenceAnnotation +from dedoc.data_structures.line_with_meta import LineWithMeta +from dedoc.data_structures.unstructured_document import UnstructuredDocument +from dedoc.extensions import recognized_mimes +from dedoc.readers.base_reader import BaseReader +from dedoc.utils.parameter_utils import get_param_document_type +from dedoc.utils.utils import get_mime_extension + + +class ArticleReader(BaseReader): + """ + This class is used for parsing scientific articles with .pdf extension using `GROBID `_ system. + """ + + def __init__(self, config: Optional[dict] = None) -> None: + super().__init__(config=config) + self.grobid_url = f"http://{os.environ.get('GROBID_HOST', 'localhost')}:{os.environ.get('GROBID_PORT', '8070')}" + self.url = f"{self.grobid_url}/api/processFulltextDocument" + self.grobid_is_alive = False + self.__update_grobid_alive(self.grobid_url, max_attempts=self.config.get("grobid_max_connection_attempts", 3)) + + def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: + """ + The method calls the service GROBID method ``/api/processFulltextDocument`` and analyzes the result (format XML/TEI) of the recognized article + using beautifulsoup library. + As a result, the method fills the class :class:`~dedoc.data_structures.UnstructuredDocument`. + Article reader adds additional information to the `tag_hierarchy_level` of :class:`~dedoc.data_structures.LineMetadata`. + The method extracts information about ``authors``, ``bibliography items``, ``sections``, and ``tables``. + You can find more information about the extracted information from GROBID system on the page :ref:`article_structure`. + + Look to the documentation of :meth:`~dedoc.readers.BaseReader.read` to get information about the method's parameters. + """ + with open(file_path, "rb") as file: + files = {"input": file} + try: + response = requests.post(self.url, files=files) + if response.status_code != 200: + warning = f"GROBID returns code {response.status_code}." + self.logger.warning(warning) + return UnstructuredDocument(tables=[], lines=[], attachments=[], warnings=[warning]) + except requests.exceptions.ConnectionError as ex: + warning = f"GROBID doesn't response. Check GROBID service on {self.url}. Exception' msg: {ex}" + self.logger.warning(warning) + return UnstructuredDocument(tables=[], lines=[], attachments=[], warnings=[warning]) + + soup = BeautifulSoup(response.text, features="lxml") + lines = self.__parse_title(soup) + + if soup.biblstruct is not None: + authors = soup.biblstruct.find_all("author") + lines += [line for author in authors for line in self.__parse_author(author)] + + bib_lines, bib2uid = self.__parse_bibliography(soup) + tables, table2uid = self.__parse_tables(soup) + + lines += self.__parse_text(soup, bib2uid, table2uid) + lines.extend(bib_lines) + + return UnstructuredDocument(tables=tables, lines=lines, attachments=[], warnings=["use GROBID (version: 0.8.0)"]) + + def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, extension: Optional[str] = None, parameters: Optional[dict] = None) -> bool: + """ + Check if: + + * the document extension is suitable for this reader (.pdf); + * parameter "document_type" is "article"; + * GROBID service is running on port 8070. + + Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. + """ + if get_param_document_type(parameters) != "article": + return False + + self.__update_grobid_alive(self.grobid_url, max_attempts=1) + if not self.grobid_is_alive: + return False + + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + return mime in recognized_mimes.pdf_like_format and extension.lower() == ".pdf" + + def __update_grobid_alive(self, grobid_url: str, max_attempts: int = 2) -> None: + if self.grobid_is_alive: + return + + attempt = max_attempts + while attempt > 0: + try: + response = requests.get(f"{grobid_url}/api/isalive") + if response.status_code == 200: + self.logger.info(f"GROBID up on {grobid_url}.") + self.grobid_is_alive = True + return + except requests.exceptions.ConnectionError as ex: + self.logger.warning(f"GROBID doesn't response. Check GROBID service on {self.url}. Exception's msg: {ex}") + time.sleep(5) + attempt -= 1 + + self.grobid_is_alive = False + + def __get_tag_by_hierarchy_path(self, source: Tag, hierarchy_path: List[str]) -> Optional[str]: + cur_tag = source + for path_item in hierarchy_path: + cur_tag = cur_tag.find(path_item) + if cur_tag is None: + # tag not found + return "" + + return ArticleReader.__tag2text(cur_tag) + + def __create_line(self, text: str, hierarchy_level_id: Optional[int] = None, paragraph_type: Optional[str] = None, + annotations: Optional[List[Annotation]] = None, other_fields: Optional[Dict] = None) -> LineWithMeta: + # TODO check on improve + if other_fields is None: + other_fields = {} + assert text is not None + assert isinstance(text, str) + + if hierarchy_level_id is None or paragraph_type is None: + hierarchy_level = HierarchyLevel.create_raw_text() + else: + hierarchy_level = HierarchyLevel(level_1=hierarchy_level_id, level_2=0, can_be_multiline=False, line_type=paragraph_type) + + return LineWithMeta(line=text, + metadata=LineMetadata(page_id=0, line_id=0, tag_hierarchy_level=hierarchy_level, other_fields=other_fields), + annotations=annotations) + + def __parse_affiliation(self, affiliation_tag: Tag) -> List[LineWithMeta]: + lines = [self.__create_line(text=affiliation_tag.get("key"), hierarchy_level_id=2, paragraph_type="author_affiliation")] + + if affiliation_tag.orgname: + lines.append(self.__create_line(text=self.__tag2text(affiliation_tag.orgname), hierarchy_level_id=3, paragraph_type="org_name")) + + if affiliation_tag.address: + lines.append(self.__create_line(text=affiliation_tag.address.text, hierarchy_level_id=3, paragraph_type="address")) + + return lines + + def __parse_author(self, author_tag: Tag) -> List[LineWithMeta]: + """ + + Example: + + SoniaBelaïd + + École Normale Supérieure +
+ 45 rue dUlm + 75005 + Paris +
+
+ + Thales Communications & Security +
+ 4 Avenue des Louvresses + 92230 + Gennevilliers +
+
+
+ """ + lines = [self.__create_line(text="", hierarchy_level_id=1, paragraph_type="author")] + + first_name = self.__get_tag_by_hierarchy_path(author_tag, ["persname", "forename"]) + if first_name: + lines.append(self.__create_line(text=first_name, hierarchy_level_id=2, paragraph_type="author_first_name")) + + surname = self.__get_tag_by_hierarchy_path(author_tag, ["persname", "surname"]) + if surname: + lines.append(self.__create_line(text=surname, hierarchy_level_id=2, paragraph_type="author_surname")) + + lines += [ + self.__create_line(text=email.get_text(), hierarchy_level_id=3, paragraph_type="email") + for email in author_tag.find_all("email") if email + ] + + affiliations = author_tag.find_all("affiliation") + lines += [line for affiliation in affiliations for line in self.__parse_affiliation(affiliation)] + + return lines + + def __create_line_with_refs(self, content: List[Tuple[str, Tag]], bib2uid: dict, table2uid: dict) -> LineWithMeta: + text = "" + start = 0 + annotations = [] + + for subpart in content: + if isinstance(subpart, Tag) and subpart.name == "ref": + target = subpart.get("target") + sub_text = subpart.string + if subpart.get("type") == "bibr" and target in bib2uid: + annotations.append(ReferenceAnnotation(value=bib2uid[target], start=start, end=start + len(sub_text))) + if subpart.get("type") == "table" and target in table2uid: + annotations.append(TableAnnotation(name=table2uid[target], start=start, end=start + len(sub_text))) + else: + sub_text = subpart if isinstance(subpart, str) else "" + + text += sub_text + start += len(sub_text) + + return self.__create_line(text=text, hierarchy_level_id=None, paragraph_type=None, annotations=annotations) + + def __parse_text(self, soup: Tag, bib2uid: dict, table2uid: dict) -> List[LineWithMeta]: + """ + Example of section XML tag: +
Preprocessing

...

...

+ """ + lines = [] + + abstract = soup.find("abstract").p + lines.append(self.__create_line(text="Abstract", hierarchy_level_id=1, paragraph_type="abstract")) + lines.append(self.__create_line(text=self.__tag2text(abstract))) + + for text in soup.find_all("text"): + for part in text.find_all("div"): + # TODO: Beautifulsoup doesn't read tags from input XML file. WTF! + # As a result we lose section number in text (see example above) + # Need to fix this in the future. + number = part.head.get("n") + " " if part.head else "" + line_text = str(part.contents[0]) if len(part.contents) > 0 else None + if line_text is not None and len(line_text) > 0: + lines.append(self.__create_line(text=number + line_text, hierarchy_level_id=1, paragraph_type="section")) + for subpart in part.find_all("p"): + if subpart.string is not None: + lines.append(self.__create_line_with_refs(subpart.string, bib2uid, table2uid)) + elif subpart.contents and len(subpart.contents) > 0: + lines.append(self.__create_line_with_refs(subpart.contents, bib2uid, table2uid)) + + return lines + + @staticmethod + def __tag2text(tag: Tag) -> str: + return "" if not tag or not tag.string else tag.string + + def __parse_tables(self, soup: Tag) -> Tuple[List[Table], dict]: + """ + Example Table with table's ref: + ----------------------------------------------- + Table Reference Example: + 1 + ... + Table Example: +
+ Table 1 . + + Performance of some illustrative AES implementations. + + Software (8-bit)code sizecyclecostphysical + Implementations(bytes)countfunctionassumptions + Unprotected [13]165945577.560- + ... +
+
+ """ + tables = [] + table2uid = {} + + tag_tables = soup.find_all("figure", {"type": "table"}) + for table in tag_tables: + row_cells = [] + head = table.contents[0] if len(table.contents) > 0 and isinstance(table.contents[0], str) else self.__tag2text(table.head) + title = head + self.__tag2text(table.figdesc) + for row in table.table.find_all("row"): + row_cells.append([CellWithMeta(lines=[self.__create_line(self.__tag2text(cell))]) for cell in row.find_all("cell")]) + tables.append(Table(cells=row_cells, metadata=TableMetadata(page_id=0, title=title))) + table2uid["#" + table.get("xml:id")] = tables[-1].metadata.uid + + return tables, table2uid + + def __parse_bibliography(self, soup: Tag) -> Tuple[List[LineWithMeta], dict]: + """ + Reference Example: + [6] + ... + + + + Leakage-resilient symmetric encryption via re-keying + + MichelAbdalla + + + SoniaBelaïd + + + Pierre-AlainFouque + + + + Bertoni and Coron + + 4 + + + + + + """ + lines = [] + cites = {} # bib_item_grobid_uid: line_uid + + # according GROBID description + level_2_paragraph_type = {"a": "title", "j": "title_journal", "s": "title_series", "m": "title_conference_proceedings"} + + bibliography = soup.find("listbibl", recursive=True) + lines.append(self.__create_line(text="bibliography", hierarchy_level_id=1, paragraph_type="bibliography")) + if not bibliography: + return lines, cites + + bib_items = bibliography.find_all("biblstruct") + if not bib_items: + return lines, cites + + # parse bibliography items + for bib_item in bib_items: + cites["#" + bib_item.get("xml:id")] = lines[-1].uid + lines.append(self.__create_line(text="", hierarchy_level_id=2, paragraph_type="bibliography_item", other_fields={"uid": lines[-1].uid})) + + # parse bib title + for title in bib_item.find_all("title", recursive=True): + if title.get("level"): + paragraph_type = level_2_paragraph_type[title.get("level")] + lines.append(self.__create_line(text=self.__tag2text(title), hierarchy_level_id=3, paragraph_type=paragraph_type)) + + lines += [ # parse bib authors + self.__create_line(text=author.get_text(), hierarchy_level_id=3, paragraph_type="author") + for author in bib_item.find_all("author", recursive=True) if author + ] + + lines += [ # parse biblScope + self.__create_line(text=self.__tag2text(bibl_scope), hierarchy_level_id=3, paragraph_type="biblScope_volume") + for bibl_scope in bib_item.find_all("biblscope", {"unit": "volume"}, recursive=True) if bibl_scope + ] + + try: + lines += [ # parse values + self.__create_line(text=f"{bibl_scope.get('from')}-{bibl_scope.get('to')}", hierarchy_level_id=3, paragraph_type="biblScope_page") + for bibl_scope in bib_item.find_all("biblscope", {"unit": "page"}, recursive=True) if bibl_scope + ] + finally: + self.logger.warning("Grobid parsing warning: was non-standard format") + + lines += [ # parse DOI (maybe more one) + self.__create_line(text=self.__tag2text(idno), hierarchy_level_id=3, paragraph_type="DOI") + for idno in bib_item.find_all("idno", recursive=True) if idno + ] + + if bib_item.publisher: + lines.append(self.__create_line(text=self.__tag2text(bib_item.publisher), hierarchy_level_id=3, paragraph_type="publisher")) + + if bib_item.date: + lines.append(self.__create_line(text=self.__tag2text(bib_item.date), hierarchy_level_id=3, paragraph_type="date")) + + return lines, cites + + def __parse_title(self, soup: Tag) -> List[LineWithMeta]: + return [self.__create_line(text=self.__tag2text(soup.title), hierarchy_level_id=0, paragraph_type="root")] diff --git a/dedoc/readers/csv_reader/csv_reader.py b/dedoc/readers/csv_reader/csv_reader.py index d1de64ed..e2e09453 100644 --- a/dedoc/readers/csv_reader/csv_reader.py +++ b/dedoc/readers/csv_reader/csv_reader.py @@ -25,7 +25,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.csv_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/docx_reader/docx_reader.py b/dedoc/readers/docx_reader/docx_reader.py index 1e503738..3d4c9028 100644 --- a/dedoc/readers/docx_reader/docx_reader.py +++ b/dedoc/readers/docx_reader/docx_reader.py @@ -7,6 +7,7 @@ from dedoc.extensions import recognized_extensions, recognized_mimes from dedoc.readers.base_reader import BaseReader from dedoc.readers.docx_reader.data_structures.docx_document import DocxDocument +from dedoc.utils.parameter_utils import get_param_with_attachments from dedoc.utils.utils import get_mime_extension @@ -25,7 +26,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.docx_like_format or mime in recognized_mimes.docx_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: @@ -34,9 +35,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure This reader is able to add some additional information to the `tag_hierarchy_level` of :class:`~dedoc.data_structures.LineMetadata`. Look to the documentation of :meth:`~dedoc.readers.BaseReader.read` to get information about the method's parameters. """ - parameters = {} if parameters is None else parameters - - with_attachments = self.attachment_extractor.with_attachments(parameters=parameters) + with_attachments = get_param_with_attachments(parameters) attachments = self.attachment_extractor.extract(file_path=file_path, parameters=parameters) if with_attachments else [] docx_document = DocxDocument(path=file_path, attachments=attachments, logger=self.logger) diff --git a/dedoc/readers/email_reader/email_reader.py b/dedoc/readers/email_reader/email_reader.py index 7a239e31..448d13e0 100644 --- a/dedoc/readers/email_reader/email_reader.py +++ b/dedoc/readers/email_reader/email_reader.py @@ -33,7 +33,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension or mime is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return file_path.lower().endswith(".eml") or mime == "message/rfc822" def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/excel_reader/excel_reader.py b/dedoc/readers/excel_reader/excel_reader.py index 91501e97..6e882a50 100644 --- a/dedoc/readers/excel_reader/excel_reader.py +++ b/dedoc/readers/excel_reader/excel_reader.py @@ -11,6 +11,7 @@ from dedoc.data_structures.unstructured_document import UnstructuredDocument from dedoc.extensions import recognized_extensions, recognized_mimes from dedoc.readers.base_reader import BaseReader +from dedoc.utils.parameter_utils import get_param_with_attachments from dedoc.utils.utils import get_mime_extension xlrd.xlsx.ensure_elementtree_imported(False, None) @@ -32,7 +33,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.excel_like_format or mime in recognized_mimes.excel_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: @@ -40,14 +41,13 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure This method extracts tables and attachments from the document, `lines` attribute remains empty. Look to the documentation of :meth:`~dedoc.readers.BaseReader.read` to get information about the method's parameters. """ - parameters = {} if parameters is None else parameters with xlrd.open_workbook(file_path) as book: sheets_num = book.nsheets tables = [] for sheet_num in range(sheets_num): sheet = book.sheet_by_index(sheet_num) tables.append(self.__parse_sheet(sheet_num, sheet)) - if self.attachment_extractor.with_attachments(parameters=parameters): + if get_param_with_attachments(parameters): attachments = self.attachment_extractor.extract(file_path=file_path, parameters=parameters) else: attachments = [] diff --git a/dedoc/readers/html_reader/html_reader.py b/dedoc/readers/html_reader/html_reader.py index 4a2668bf..83fb2085 100644 --- a/dedoc/readers/html_reader/html_reader.py +++ b/dedoc/readers/html_reader/html_reader.py @@ -33,7 +33,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in [".html", ".shtml"] or mime in ["text/html"] def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/json_reader/json_reader.py b/dedoc/readers/json_reader/json_reader.py index f408674f..767542cf 100644 --- a/dedoc/readers/json_reader/json_reader.py +++ b/dedoc/readers/json_reader/json_reader.py @@ -28,7 +28,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader (it has .json extension). Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower().endswith(".json") def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/mhtml_reader/mhtml_reader.py b/dedoc/readers/mhtml_reader/mhtml_reader.py index ea980dec..f08e84e4 100644 --- a/dedoc/readers/mhtml_reader/mhtml_reader.py +++ b/dedoc/readers/mhtml_reader/mhtml_reader.py @@ -32,7 +32,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower().endswith(tuple(self.mhtml_extensions)) def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/note_reader/note_reader.py b/dedoc/readers/note_reader/note_reader.py index 836a98bb..e1e15b90 100644 --- a/dedoc/readers/note_reader/note_reader.py +++ b/dedoc/readers/note_reader/note_reader.py @@ -22,7 +22,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower().endswith(".note.pickle") def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py b/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py index e2c3ad37..c47e7e7d 100644 --- a/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py +++ b/dedoc/readers/pdf_reader/pdf_auto_reader/pdf_auto_reader.py @@ -47,7 +47,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, It is recommended to use `pdf_with_text_layer=auto_tabby` because it's faster and allows to get better results. You can look to :ref:`pdf_handling_parameters` to get more information about `parameters` dictionary possible arguments. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) if not (mime in recognized_mimes.pdf_like_format or extension.lower() == ".pdf"): return False diff --git a/dedoc/readers/pdf_reader/pdf_base_reader.py b/dedoc/readers/pdf_reader/pdf_base_reader.py index fd6ed93b..8372fb92 100644 --- a/dedoc/readers/pdf_reader/pdf_base_reader.py +++ b/dedoc/readers/pdf_reader/pdf_base_reader.py @@ -91,19 +91,12 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure lines, scan_tables, attachments, warnings, other_fields = self._parse_document(file_path, params_for_parse) tables = [scan_table.to_table() for scan_table in scan_tables] - if self._can_contain_attachements(file_path) and self.attachment_extractor.with_attachments(parameters): + if param_utils.get_param_with_attachments(parameters) and self.attachment_extractor.can_extract(file_path): attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters) result = UnstructuredDocument(lines=lines, tables=tables, attachments=attachments, warnings=warnings, metadata=other_fields) return self._postprocess(result) - def _can_contain_attachements(self, path: str) -> bool: - can_contain_attachments = False - mime = get_file_mime_type(path) - if mime in recognized_mimes.pdf_like_format: - can_contain_attachments = True - return can_contain_attachments - def _parse_document(self, path: str, parameters: ParametersForParseDoc) -> ( Tuple)[List[LineWithMeta], List[ScanTable], List[PdfImageAttachment], List[str], Optional[dict]]: first_page = 0 if parameters.first_page is None or parameters.first_page < 0 else parameters.first_page @@ -138,7 +131,7 @@ def _parse_document(self, path: str, parameters: ParametersForParseDoc) -> ( prev_line = None for line in all_lines_with_links: - line.metadata.tag_hierarchy_level = DefaultStructureExtractor.get_list_hl_with_regexp(line, prev_line) + line.metadata.tag_hierarchy_level = DefaultStructureExtractor.get_hl_list_using_regexp(line, prev_line) prev_line = line all_lines_with_paragraphs = self.paragraph_extractor.extract(all_lines_with_links) diff --git a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py index fd2cf6ff..53edd2e1 100644 --- a/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py +++ b/dedoc/readers/pdf_reader/pdf_image_reader/pdf_image_reader.py @@ -55,7 +55,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. You can also see :ref:`pdf_handling_parameters` to get more information about `parameters` dictionary possible arguments. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return mime in recognized_mimes.pdf_like_format or mime in recognized_mimes.image_like_format or \ file_path.lower().endswith(tuple(recognized_extensions.image_like_format)) or extension.lower().replace(".", "") in supported_image_types diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py index afa15d26..00e5e552 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py @@ -34,7 +34,7 @@ from dedoc.readers.pdf_reader.pdf_base_reader import ParametersForParseDoc, PdfBaseReader from dedoc.structure_extractors.concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor from dedoc.structure_extractors.feature_extractors.list_features.list_utils import get_dotted_item_depth -from dedoc.utils.parameter_utils import get_param_page_slice, get_param_pdf_with_txt_layer +from dedoc.utils.parameter_utils import get_param_page_slice, get_param_pdf_with_txt_layer, get_param_with_attachments from dedoc.utils.pdf_utils import get_pdf_page_count from dedoc.utils.utils import calculate_file_hash, get_mime_extension, get_unique_name @@ -66,8 +66,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - parameters = {} if parameters is None else parameters - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return (mime in recognized_mimes.pdf_like_format or extension.lower().endswith("pdf")) and get_param_pdf_with_txt_layer(parameters) == "tabby" def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: @@ -80,11 +79,10 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure """ parameters = {} if parameters is None else parameters warnings = [] - lines, tables, tables_on_images, image_attachments, document_metadata = self.__extract(path=file_path, parameters=parameters, warnings=warnings) - lines = self.linker.link_objects(lines=lines, tables=tables_on_images, images=image_attachments) + lines, tables, tables_on_images, attachments, document_metadata = self.__extract(path=file_path, parameters=parameters, warnings=warnings) + lines = self.linker.link_objects(lines=lines, tables=tables_on_images, images=attachments) - attachments = image_attachments - if self._can_contain_attachements(file_path) and self.attachment_extractor.with_attachments(parameters): + if get_param_with_attachments(parameters) and self.attachment_extractor.can_extract(file_path): attachments += self.attachment_extractor.extract(file_path=file_path, parameters=parameters) lines = [line for line_group in lines for line in line_group.split("\n")] @@ -266,7 +264,7 @@ def __get_tag(self, line: LineWithMeta, prev_line: Optional[LineWithMeta], line_ return HierarchyLevel(1, header_level, False, line_type) if line_type == "litem": # TODO automatic list depth and merge list items from multiple lines - return DefaultStructureExtractor.get_list_hl_with_regexp(line, prev_line) + return DefaultStructureExtractor.get_hl_list_using_regexp(line, prev_line) return HierarchyLevel(None, None, True, line_type) diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py index 86277bf0..97e55a03 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_txtlayer_reader.py @@ -34,8 +34,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - parameters = {} if parameters is None else parameters - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return (mime in recognized_mimes.pdf_like_format or extension.lower().endswith("pdf")) and get_param_pdf_with_txt_layer(parameters) == "true" def _process_one_page(self, diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar index e8199056..2d22e7c2 100644 Binary files a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar and b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/tabbypdf/jars/ispras_tbl_extr.jar differ diff --git a/dedoc/readers/pptx_reader/pptx_reader.py b/dedoc/readers/pptx_reader/pptx_reader.py index e387de46..6dc77551 100644 --- a/dedoc/readers/pptx_reader/pptx_reader.py +++ b/dedoc/readers/pptx_reader/pptx_reader.py @@ -15,6 +15,7 @@ from dedoc.data_structures.unstructured_document import UnstructuredDocument from dedoc.extensions import recognized_extensions, recognized_mimes from dedoc.readers.base_reader import BaseReader +from dedoc.utils.parameter_utils import get_param_with_attachments from dedoc.utils.utils import get_mime_extension @@ -33,7 +34,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower() in recognized_extensions.pptx_like_format or mime in recognized_mimes.pptx_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: @@ -41,9 +42,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure The method return document content with all document's lines, tables and attachments. Look to the documentation of :meth:`~dedoc.readers.BaseReader.read` to get information about the method's parameters. """ - parameters = {} if parameters is None else parameters - - with_attachments = self.attachments_extractor.with_attachments(parameters=parameters) + with_attachments = get_param_with_attachments(parameters) attachments = self.attachments_extractor.extract(file_path=file_path, parameters=parameters) if with_attachments else [] attachment_name2uid = {attachment.original_name: attachment.uid for attachment in attachments} diff --git a/dedoc/readers/reader_composition.py b/dedoc/readers/reader_composition.py index 9cf0aec3..2bac6917 100644 --- a/dedoc/readers/reader_composition.py +++ b/dedoc/readers/reader_composition.py @@ -30,7 +30,7 @@ def read(self, file_path: str, parameters: Optional[dict] = None) -> Unstructure :return: intermediate representation of the document with lines, tables and attachments """ file_name = os.path.basename(file_path) - extension, mime = get_mime_extension(file_path=file_path) + mime, extension = get_mime_extension(file_path=file_path) for reader in self.readers: if reader.can_read(file_path=file_path, mime=mime, extension=extension, parameters=parameters): diff --git a/dedoc/readers/txt_reader/raw_text_reader.py b/dedoc/readers/txt_reader/raw_text_reader.py index 33ffe656..58bb1164 100644 --- a/dedoc/readers/txt_reader/raw_text_reader.py +++ b/dedoc/readers/txt_reader/raw_text_reader.py @@ -30,7 +30,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, Check if the document extension is suitable for this reader. Look to the documentation of :meth:`~dedoc.readers.BaseReader.can_read` to get information about the method's parameters. """ - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension.lower().endswith((".txt", "txt.gz")) def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: @@ -70,7 +70,7 @@ def _get_lines_with_meta(self, path: str, encoding: str) -> List[LineWithMeta]: indent_annotation = self.__get_indent_annotation(line) line_with_meta = LineWithMeta(line=line, metadata=metadata, annotations=[spacing_annotation, indent_annotation], uid=uid) - line_with_meta.metadata.tag_hierarchy_level = DefaultStructureExtractor.get_list_hl_with_regexp(line_with_meta, prev_line) + line_with_meta.metadata.tag_hierarchy_level = DefaultStructureExtractor.get_hl_list_using_regexp(line_with_meta, prev_line) prev_line = line_with_meta lines.append(line_with_meta) diff --git a/dedoc/structure_extractors/__init__.py b/dedoc/structure_extractors/__init__.py index 2e3e9132..404d915c 100644 --- a/dedoc/structure_extractors/__init__.py +++ b/dedoc/structure_extractors/__init__.py @@ -1,12 +1,14 @@ from .abstract_structure_extractor import AbstractStructureExtractor +from .concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor from .concrete_structure_extractors.abstract_law_structure_extractor import AbstractLawStructureExtractor +from .concrete_structure_extractors.article_structure_extractor import ArticleStructureExtractor from .concrete_structure_extractors.classifying_law_structure_extractor import ClassifyingLawStructureExtractor -from .concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor from .concrete_structure_extractors.diploma_structure_extractor import DiplomaStructureExtractor from .concrete_structure_extractors.foiv_law_structure_extractor import FoivLawStructureExtractor from .concrete_structure_extractors.law_structure_excractor import LawStructureExtractor from .concrete_structure_extractors.tz_structure_extractor import TzStructureExtractor from .structure_extractor_composition import StructureExtractorComposition -__all__ = ['AbstractStructureExtractor', 'AbstractLawStructureExtractor', 'ClassifyingLawStructureExtractor', 'DefaultStructureExtractor', - 'DiplomaStructureExtractor', 'FoivLawStructureExtractor', 'LawStructureExtractor', 'TzStructureExtractor', 'StructureExtractorComposition'] +__all__ = ['AbstractStructureExtractor', 'AbstractLawStructureExtractor', 'ArticleStructureExtractor', 'ClassifyingLawStructureExtractor', + 'DefaultStructureExtractor', 'DiplomaStructureExtractor', 'FoivLawStructureExtractor', 'LawStructureExtractor', 'TzStructureExtractor', + 'StructureExtractorComposition'] diff --git a/dedoc/structure_extractors/concrete_structure_extractors/article_structure_extractor.py b/dedoc/structure_extractors/concrete_structure_extractors/article_structure_extractor.py new file mode 100644 index 00000000..4ef6d4e8 --- /dev/null +++ b/dedoc/structure_extractors/concrete_structure_extractors/article_structure_extractor.py @@ -0,0 +1,35 @@ +from typing import List, Optional + +from dedoc.data_structures import HierarchyLevel, UnstructuredDocument +from dedoc.data_structures.line_with_meta import LineWithMeta +from dedoc.structure_extractors import AbstractStructureExtractor + + +class ArticleStructureExtractor(AbstractStructureExtractor): + """ + This class corresponds to the `GROBID `_ article structure extraction. + + This class saves all tag_hierarchy_levels received from the :class:`~dedoc.readers.ArticleReader` \ + without using the postprocessing step (without using regular expressions). + + You can find the description of this type of structure in the section :ref:`article_structure`. + """ + document_type = "article" + + def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = None) -> UnstructuredDocument: + """ + Extract article structure from the given document and add additional information to the lines' metadata. + To get the information about the method's parameters look at the documentation of the class \ + :class:`~dedoc.structure_extractors.AbstractStructureExtractor`. + """ + for line in document.lines: + if line.metadata.tag_hierarchy_level is None: + line.metadata.tag_hierarchy_level = HierarchyLevel.create_raw_text() + else: + line.metadata.hierarchy_level = line.metadata.tag_hierarchy_level + assert line.metadata.hierarchy_level is not None + + return document + + def _postprocess(self, lines: List[LineWithMeta], paragraph_type: List[str], regexps: List, excluding_regexps: List) -> List[LineWithMeta]: + return lines diff --git a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py index 9ce18e0e..da6e40cf 100644 --- a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py +++ b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py @@ -35,7 +35,7 @@ def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = N line.metadata.tag_hierarchy_level = HierarchyLevel.create_unknown() if line.metadata.tag_hierarchy_level.line_type == HierarchyLevel.unknown: - line.metadata.hierarchy_level = self.get_list_hl_with_regexp(line, previous_line) + line.metadata.hierarchy_level = self.get_hl_list_using_regexp(line, previous_line) else: line.metadata.hierarchy_level = self.__get_hl_with_tag(line) @@ -61,7 +61,7 @@ def __get_hl_with_tag(self, line: LineWithMeta) -> HierarchyLevel: return line.metadata.tag_hierarchy_level @staticmethod - def get_list_hl_with_regexp(line: LineWithMeta, previous_line: Optional[LineWithMeta]) -> HierarchyLevel: + def get_hl_list_using_regexp(line: LineWithMeta, previous_line: Optional[LineWithMeta]) -> HierarchyLevel: prefix = get_prefix(DefaultStructureExtractor.prefix_list, line) # TODO dotted list without space after numbering, like "1.Some text" diff --git a/dedoc/structure_extractors/hierarchy_level_builders/diploma_builder/body_builder.py b/dedoc/structure_extractors/hierarchy_level_builders/diploma_builder/body_builder.py index 85f3006d..7f26fad1 100644 --- a/dedoc/structure_extractors/hierarchy_level_builders/diploma_builder/body_builder.py +++ b/dedoc/structure_extractors/hierarchy_level_builders/diploma_builder/body_builder.py @@ -44,7 +44,7 @@ def get_lines_with_hierarchy(self, lines_with_labels: List[Tuple[LineWithMeta, s elif prediction == "raw_text": line = self.__postprocess_raw_text(line, init_hl_depth) if not (line.metadata.hierarchy_level is not None and line.metadata.hierarchy_level.line_type == "named_item"): - line.metadata.hierarchy_level = DefaultStructureExtractor.get_list_hl_with_regexp(line, previous_raw_text_line) + line.metadata.hierarchy_level = DefaultStructureExtractor.get_hl_list_using_regexp(line, previous_raw_text_line) previous_raw_text_line = line else: line.metadata.hierarchy_level = HierarchyLevel.create_raw_text() diff --git a/dedoc/utils/parameter_utils.py b/dedoc/utils/parameter_utils.py index f7f0a090..ece1cf56 100644 --- a/dedoc/utils/parameter_utils.py +++ b/dedoc/utils/parameter_utils.py @@ -19,6 +19,13 @@ def get_param_language(parameters: Optional[dict]) -> str: return language +def get_param_document_type(parameters: Optional[dict]) -> str: + if parameters is None: + return "other" + document_type = str(parameters.get("document_type", "other")).lower() + return document_type + + def get_param_orient_analysis_cells(parameters: Optional[dict]) -> bool: if parameters is None: return False @@ -26,6 +33,12 @@ def get_param_orient_analysis_cells(parameters: Optional[dict]) -> bool: return orient_analysis_cells +def get_param_with_attachments(parameters: Optional[dict]) -> bool: + if parameters is None: + return False + return str(parameters.get("with_attachments", "false")).lower() == "true" + + def get_param_need_header_footers_analysis(parameters: Optional[dict]) -> bool: if parameters is None: return False diff --git a/dedoc/utils/pdf_utils.py b/dedoc/utils/pdf_utils.py index 68bfa9a6..ba574dfd 100644 --- a/dedoc/utils/pdf_utils.py +++ b/dedoc/utils/pdf_utils.py @@ -1,15 +1,14 @@ from typing import Optional from PIL.Image import Image -from PyPDF2 import PdfFileReader from pdf2image import convert_from_path +from pypdf import PdfReader def get_pdf_page_count(path: str) -> Optional[int]: try: - with open(path, "rb") as fl: - reader = PdfFileReader(fl) - return reader.getNumPages() + reader = PdfReader(path) + return len(reader.pages) except Exception: return None diff --git a/docker-compose.yml b/docker-compose.yml index 904c36d8..85378db9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,10 +12,13 @@ services: - 1231:1231 environment: DOCREADER_PORT: 1231 + GROBID_HOST: "grobid" + GROBID_PORT: 8070 test: depends_on: + - grobid - dedoc build: context: . @@ -24,7 +27,14 @@ services: environment: DOC_READER_HOST: "dedoc" DOCREADER_PORT: 1231 + GROBID_HOST: "grobid" + GROBID_PORT: 8070 is_test: $test PYTHONPATH: $PYTHONPATH:/dedoc_root/tests:/dedoc_root command: bash dedoc_root/tests/run_tests_in_docker.sh + + grobid: + image: "lfoppiano/grobid:0.8.0" + ports: + - 8070:8070 diff --git a/docs/source/_static/code_examples/djvu_converter.py b/docs/source/_static/code_examples/djvu_converter.py index 192f889f..79415696 100644 --- a/docs/source/_static/code_examples/djvu_converter.py +++ b/docs/source/_static/code_examples/djvu_converter.py @@ -15,7 +15,7 @@ def can_convert(self, extension: Optional[str] = None, mime: Optional[str] = None, parameters: Optional[dict] = None) -> bool: - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + _, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension == ".djvu" def convert(self, file_path: str, parameters: Optional[dict] = None) -> str: diff --git a/docs/source/_static/code_examples/pdf_attachment_extractor.py b/docs/source/_static/code_examples/pdf_attachment_extractor.py index e28a7a2e..57d1739e 100644 --- a/docs/source/_static/code_examples/pdf_attachment_extractor.py +++ b/docs/source/_static/code_examples/pdf_attachment_extractor.py @@ -15,7 +15,7 @@ def can_extract(self, extension: Optional[str] = None, mime: Optional[str] = None, parameters: Optional[dict] = None) -> bool: - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension in recognized_extensions.pdf_like_format or mime in recognized_mimes.pdf_like_format def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]: diff --git a/docs/source/_static/code_examples/pdf_reader.py b/docs/source/_static/code_examples/pdf_reader.py index b588ae65..e6b01ef1 100644 --- a/docs/source/_static/code_examples/pdf_reader.py +++ b/docs/source/_static/code_examples/pdf_reader.py @@ -21,7 +21,7 @@ def __init__(self, config: Optional[dict] = None) -> None: self.attachment_extractor = PdfAttachmentsExtractor(config=self.config) def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, extension: Optional[str] = None, parameters: Optional[dict] = None) -> bool: - extension, mime = get_mime_extension(file_path=file_path, mime=mime, extension=extension) + mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension) return extension in recognized_extensions.pdf_like_format or mime in recognized_mimes.pdf_like_format def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument: diff --git a/docs/source/_static/json_format_examples/article_example.json b/docs/source/_static/json_format_examples/article_example.json new file mode 100644 index 00000000..712c5841 --- /dev/null +++ b/docs/source/_static/json_format_examples/article_example.json @@ -0,0 +1,8632 @@ +{ + "content": { + "structure": { + "node_id": "0", + "text": "Masking and Leakage-Resilient Primitives: One, the Other(s) or Both?", + "annotations": [], + "metadata": { + "paragraph_type": "root", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.0", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.0.0", + "text": "Sonia", + "annotations": [], + "metadata": { + "paragraph_type": "author_first_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.0.1", + "text": "Belaïd", + "annotations": [], + "metadata": { + "paragraph_type": "author_surname", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.0.2", + "text": "aff0", + "annotations": [], + "metadata": { + "paragraph_type": "author_affiliation", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.0.2.0", + "text": "École Normale Supérieure", + "annotations": [], + "metadata": { + "paragraph_type": "org_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.0.2.1", + "text": "\n45 rue dUlm\n75005\nParis\n", + "annotations": [], + "metadata": { + "paragraph_type": "address", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.0.3", + "text": "aff1", + "annotations": [], + "metadata": { + "paragraph_type": "author_affiliation", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.0.3.0", + "text": "Thales Communications & Security", + "annotations": [], + "metadata": { + "paragraph_type": "org_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.0.3.1", + "text": "\n4 Avenue des Louvresses\n92230\nGennevilliers\n", + "annotations": [], + "metadata": { + "paragraph_type": "address", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + } + ] + }, + { + "node_id": "0.1", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.1.0", + "text": "Vincent", + "annotations": [], + "metadata": { + "paragraph_type": "author_first_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.1.1", + "text": "Grosso", + "annotations": [], + "metadata": { + "paragraph_type": "author_surname", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.1.2", + "text": "aff2", + "annotations": [], + "metadata": { + "paragraph_type": "author_affiliation", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.1.2.0", + "text": "ICTEAM/ELEN/Crypto Group", + "annotations": [], + "metadata": { + "paragraph_type": "org_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.1.2.1", + "text": "\nBelgium\n", + "annotations": [], + "metadata": { + "paragraph_type": "address", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + } + ] + }, + { + "node_id": "0.2", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.2.0", + "text": "François", + "annotations": [], + "metadata": { + "paragraph_type": "author_first_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.2.1", + "text": "Xavier-Standaert", + "annotations": [], + "metadata": { + "paragraph_type": "author_surname", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.2.2", + "text": "aff2", + "annotations": [], + "metadata": { + "paragraph_type": "author_affiliation", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.2.2.0", + "text": "ICTEAM/ELEN/Crypto Group", + "annotations": [], + "metadata": { + "paragraph_type": "org_name", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.2.2.1", + "text": "\nBelgium\n", + "annotations": [], + "metadata": { + "paragraph_type": "address", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + } + ] + }, + { + "node_id": "0.3", + "text": "Abstract", + "annotations": [], + "metadata": { + "paragraph_type": "abstract", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.3.0", + "text": "Securing cryptographic implementations against side-channel attacks is one of the most important challenges in modern cryptography. Many countermeasures have been introduced for this purpose, and analyzed in specialized security models. Formal solutions have also been proposed to extend the guarantees of provable security to physically observable devices. Masking and leakage-resilient cryptography are probably the most investigated and best understood representatives of these two approaches. Unfortunately, claims whether one, the other or their combination provides better security at lower cost remained vague so far. In this paper, we provide the first comprehensive treatment of this important problem. For this purpose, we analyze whether cryptographic implementations can be security-bounded, in the sense that the time complexity of the best side-channel attack is lower-bounded, independent of the number of measurements performed. Doing so, we first put forward a significant difference between stateful primitives such as leakage-resilient PRGs (that easily ensure bounded security), and stateless ones such as leakage-resilient PRFs (that hardly do). We then show that in practice, leakage-resilience alone provides the best security vs. performance tradeoff when bounded security is achievable, while masking alone is the solution of choice otherwise. That is, we highlight that one (x)or the other approach should be privileged, which contradicts the usual intuition that physical security is best obtained by combining countermeasures.", + "annotations": [], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.4", + "text": "Introduction", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.4.0", + "text": "Masking is a frequently considered solution to improve security against sidechannel attacks [5,19]. A large number of papers investigated its application to smart card implementations of the AES (e.g. [17,37,53,55]). It essentially randomizes all the sensitive variables in a cryptographic device, by splitting them into d shares, and performs all computations on these shares afterwards. The resulting process is expected to improve physical security since if the masking scheme is carefully implemented (i.e. if the leakages of all the shares are independent), higher-order moments of the leakage distribution have to be estimated to reveal key-dependent information. It has been shown that the number of measurements needed to perform a successful DPA (Differential Power Analysis) increases exponentially with the number of shares (see, e.g. [44,59]).One limitation of masking is that (as most countermeasures against sidechannel attacks [30]) it \"only\" reduces the amount of information leakage, at the cost of sometimes strong performance overheads [20]. Another line of work, next denoted as leakage-resilient cryptography, followed a complementary approach and tried to make the exploitation of this information more difficult (e.g. computationally). For this purpose, the main assumption is that the information leakage per iteration is limited in some sense. When applied in the context of symmetric cryptography, most instances of leakage-resilient constructions rely on re-keying strategies for this purpose, as first suggested by Kocher [27]. Examples of primitives include Pseudo-Random Generators (PRGs) [12,15,41,57,58,64,65] and Pseudo-Random Functions (PRFs) [1,10,15,34,58,64].The topic of leakage resilience has given rise to quite animated debates in the cryptographic community. Several assumptions have been proposed, and the quest for models that adequately capture physical reality is still ongoing (see [57] for a recent discussion). Yet, and independent of the relevance of the proofs obtained within these models, a more pragmatic problem is to find out the security levels of leakage-resilient constructions in front of standard side-channel adversaries (i.e. the same as the ones considered in security evaluations for masking). That is, are these primitives useful to help cryptographic designers to pass current certification procedures (e.g. EMVco [14] or Common Criteria [7])?Unfortunately, claims in one or the other direction remained vague so far. The main reason is that, as hinted by Bernstein in a CHES 2012 rump session talk, substantiated answers require to consider both security and performances [3], i.e. two qualities that are generally hard to quantify. In this paper, we aim to contribute to this issue and provide tools allowing to determine the best way to reach a given security level in different (software and hardware) scenarios, within the limits of what empirical evaluations can provide. For this purpose, we will consider the AES-based PRG and PRF illustrated in Figures 1 and2, respectively. For every key k i , the PRG produces a key k i+1 and N -1 strings y i 1 , y i 2 , . . . , y i N -1 , both obtained by encrypting N public plaintexts p i j with k i . As for the PRF, we use the tree-based construction from Goldreich, Goldwasser and Micali [18], where each step incorporates log 2 [N ] input bits and generates k i+1 = AES ki (p i j ). Following [34], the last stage is optionally completed by a whitening step, in order to limit the data complexity of attacks targeting the PRF output to one (e.g. when using large N values, typically). Quite naturally, there is a simple security versus efficiency tradeoff for both types of constructions. In the first (PRG) case, we produce a 128-bit output stream every N N -1 AES encryptions. In the second (PRF) case, we produce a 128-bit output every 128 log(N ) AES encryptions (+1 if output whitening is used). The details of these primitives are not necessary for the understanding of this work. The only important feature in our discussions is that the PRG construction is stateful while the PRF one is stateless. As a result, the PRG limits the number of measurements that a side-channel adversary can perform with the same key, while the PRF limits his data complexity (i.e. the number of plaintexts that can be observed). In practice, it means that in this latter case, the same measurement can be repeated multiple times, e.g. in order to get rid of the physical noise through averaging. As already discussed by Medwed et al. in [34], Section 3, this may lead to significant difference in terms of security against DPA.In order to compare and combine these two primitives with masking, we investigate whether they can lead to security-bounded implementations, i.e. implementations such that the time complexity of the best side-channel attack remains bounded independent of the number of measurements performed by the adversary. Doing so, we first show that the stateful leakage-resilient PRG in Figure 1 naturally leads to such implementations. By contrast, this guarantee is harder to reach with (stateless) leakage-resilient PRFs such as in Figure 2. The tweaked construction proposed in [34] (that takes advantage of hardware parallelism) is in fact the only security-bounded PRF we found in our experiments. Next, we put forward that better security at lower cost is obtained by using the leakage-resilient PRG alone (i.e. without masking), while masking alone is the most efficient solution for improving the security of stateless primitives whenever the implementations cannot be security-bounded. Therefore, our results underline that both masking and leakage-resilient primitives can be useful ingredients in the design of physically secure designs. But they also lead to the counterintuitive observation that sometimes (in fact, frequently), these solutions are better used separately, hence contradicting the usual intuition that security against side-channel attacks is best obtained via a combination of countermeasures.Admittedly, these results are only obtained for a set of side-channel attacks that are representative of the state-of-the-art. Hence, positive observations such as made for the tweaked construction in [34] are not proven: they only indicate that the cryptanalysis of such schemes may be hard with current knowledge. In the same lines, the differences between leakage-resilient PRGs and PRFs do not contradict their proofs: they only indicate that the (crucial) assumption of bounded leakage can imply different challenges for hardware designers. Hence, instantiating these primitives with the same AES implementation can lead to different security levels (even if the same N value is used in both cases).", + "annotations": [ + { + "start": 92, + "end": 95, + "name": "bibliography_ref", + "value": "bac4e44c-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 95, + "end": 98, + "name": "bibliography_ref", + "value": "bac4e4bb-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 201, + "end": 205, + "name": "bibliography_ref", + "value": "bac4e4ab-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 205, + "end": 208, + "name": "bibliography_ref", + "value": "bac4e551-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 208, + "end": 211, + "name": "bibliography_ref", + "value": "bac4e5cd-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 211, + "end": 214, + "name": "bibliography_ref", + "value": "bac4e5dd-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 846, + "end": 850, + "name": "bibliography_ref", + "value": "bac4e584-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 850, + "end": 853, + "name": "bibliography_ref", + "value": "bac4e602-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 942, + "end": 946, + "name": "bibliography_ref", + "value": "bac4e516-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1055, + "end": 1059, + "name": "bibliography_ref", + "value": "bac4e4c5-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1550, + "end": 1554, + "name": "bibliography_ref", + "value": "bac4e501-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1619, + "end": 1623, + "name": "bibliography_ref", + "value": "bac4e480-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1623, + "end": 1626, + "name": "bibliography_ref", + "value": "bac4e49b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1683, + "end": 1686, + "name": "bibliography_ref", + "value": "bac4e49b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1626, + "end": 1629, + "name": "bibliography_ref", + "value": "bac4e571-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1629, + "end": 1632, + "name": "bibliography_ref", + "value": "bac4e5ec-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1929, + "end": 1933, + "name": "bibliography_ref", + "value": "bac4e5ec-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1632, + "end": 1635, + "name": "bibliography_ref", + "value": "bac4e5f6-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1689, + "end": 1692, + "name": "bibliography_ref", + "value": "bac4e5f6-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1635, + "end": 1638, + "name": "bibliography_ref", + "value": "bac4e634-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1692, + "end": 1695, + "name": "bibliography_ref", + "value": "bac4e634-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1638, + "end": 1641, + "name": "bibliography_ref", + "value": "bac4e63d-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1677, + "end": 1680, + "name": "bibliography_ref", + "value": "bac4e42a-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1680, + "end": 1683, + "name": "bibliography_ref", + "value": "bac4e46d-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1686, + "end": 1689, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3412, + "end": 3416, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 4544, + "end": 4548, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 5206, + "end": 5210, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 6249, + "end": 6253, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2381, + "end": 2385, + "name": "bibliography_ref", + "value": "bac4e499-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2405, + "end": 2408, + "name": "bibliography_ref", + "value": "bac4e461-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2640, + "end": 2643, + "name": "bibliography_ref", + "value": "bac4e43e-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3306, + "end": 3310, + "name": "bibliography_ref", + "value": "bac4e4b2-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.5", + "text": "Methodology & limitations", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.5.0", + "text": "The main goal of this paper is to provide sound techniques to evaluate how leakage-resilient PRGs/PRFs and masking combine. In this section, we provide a brief description of the methodology we will use for this purpose, and underline its limitations. The two main components, namely performance and security evaluations, are detailed in Sections 3 and 4, and then combined in Section 5. Our proposal essentially holds in five steps that we detail below.1. Fix the target security level. In the following, we will take the AES Rijndael with 128-bit key as case study. Since a small security degradation due to side-channel attacks is unavoidable, we will consider 120-bit, 100-bit and 80-bit target security levels for illustration. We do not go below 80-bit keys since it typically corresponds to current short-term security levels [9].2. Choose an implementation. Given a cryptographic algorithm, this essentially corresponds to the selection of a technology and possibly a set of countermeasures to incorporate in the designs to evaluate. In the following, we will consider both software and hardware implementations for illustration, since they lead to significantly different performance and security levels. As for countermeasures, different types of masking schemes will be considered.3. Evaluate performances / extract a cost function. Given an implementation, different metrics can be selected for this purpose (such as code size, RAM, or cycle count in software and area, frequency, throughput or power consumption in hardware). Both for software and hardware implementations, we will use combined functions, namely the \"code size × cycle count\" product and the \"area / throughput\" ratio. While our methodology would be perfectly applicable to other choices of metrics, we believe they are an interesting starting point to capture the efficiency of our different implementations. In particular for the hardware cases, such metrics are less dependent on the serial vs. parallel nature of the target architectures (see [26], Section 2).4. Evaluate security / extract the maximum number of measurements. This central part of our analysis first requires to select the attacks from which we will evaluate security. In the following, we will consider the \"standard DPA attacks\" described in [31] for this purpose. Furthermore, we will investigate them in the profiled setting of template attacks (i.e. assuming that the adversary can build a precise model for the leakage function) [6]. This choice is motivated by the goal of approaching worst-case evaluations [56]. Based on these attacks, we will estimate the security graphs introduced in [61], i.e. compute the adversaries' success rates in function of their time complexity and number of measurements. From a given security level (e.g. 120-bit time complexity), we will finally extract the maximum number of measurements per key tolerated, as can be bounded by the PRG construction1 .", + "annotations": [ + { + "start": 833, + "end": 836, + "name": "bibliography_ref", + "value": "bac4e46b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2027, + "end": 2031, + "name": "bibliography_ref", + "value": "bac4e4f7-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2295, + "end": 2299, + "name": "bibliography_ref", + "value": "bac4e51d-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2486, + "end": 2489, + "name": "bibliography_ref", + "value": "bac4e456-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2566, + "end": 2570, + "name": "bibliography_ref", + "value": "bac4e5e6-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2647, + "end": 2651, + "name": "bibliography_ref", + "value": "bac4e61b-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.6", + "text": "5.", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.6.0", + "text": "Compute a global cost metric (possibly with an application constraint). In case of security-bounded implementations, the previous security evaluation can be used to estimate how frequently one has to \"re-key\" within a leakageresilient construction. From this estimate, we derive the average number of AES encryptions to execute per 128-bit output. By multiplying this number with the cost function of our performance evaluations, we obtain a global metric for the implementation of an AES-based design ensuring a given security level. In case of security-unbounded implementations, re-keying is not sufficient to maintain the target security level independent of the number of measurements performed by the adversary. So the cost functions have to be combined with an application constraint, stating the maximum number of measurements that can be tolerated to maintain this security level.Quite naturally, such a methodology is limited in the same way as any performance and security evaluation. From the performance point-of-view, our investigations only apply to a representative subset of the (large) set of AES designs published in the literature. Because of place constraints, we first paid attention to state-of-the-art implementations and countermeasures, but applying our methodology to more examples is naturally feasible (and desirable). A very similar statement holds for security evaluations. Namely, we considered standard DPA attacks as a starting point, and because they typically correspond to the state-of-the-art in research and evaluation laboratories. Yet, cryptanalytic progresses can always appear2 . Besides, countermeasures such as masking may rely on physical assumptions that are difficult to compare rigorously (since highly technology-dependent), as will be detailed next with the case of \"glitches\".Note that these limitations are to a large extent inherent to the problem we tackle, and our results also correspond to the best we can hope in this respect. Hence, more than the practical conclusions that we draw in the following sections (that are of course important for current engineers willing to implement physically secure designs), it is the fact that we are able to compare the performance vs. security tradeoffs corresponding to the combination of leakage-resilient constructions with masking that is the most important contribution of this work. Indeed, these comparisons are dependent on the state-of-the-art implementations and attacks that are considered to be relevant for the selected algorithm.", + "annotations": [], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.7", + "text": "Performance evaluations", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.7.0", + "text": "In this section, we provide our performance evaluations for unprotected and masked AES designs. As previously mentioned, we will consider both software and hardware examples for this purpose. In this context, the main challenge is to find implementations that are (reasonably) comparable. This turned out to be relatively easy in the software case, for which we selected a couple of implementations in 8-bit microcontrollers, i.e. typical targets for side-channel analysis. By contrast, finding implementations in the same technology turns out to be more challenging in hardware: transistor sizes have evolved from (more than) 130µm to (less than) 65ηm over the last 15 years (i.e. the period over which most countermeasures against side-channel attacks have been proposed). Hence, published performance evaluations for side-channel protected designs are rarely comparable. Yet, we could find several designs in a recent FPGA technology, namely the Xilinx Virtex-5 devices (that are based on a 65ηm process).The performances of the implementations we will analyze are summarized in Table 1. As previously mentioned, our software cost function is the frequently considered \"code size × cycle count\" metric, while we use the \"area / throughput\" ratio in the hardware (FPGA) case. As for the countermeasures evaluated, we first focused on the higher-order masking scheme proposed by Rivain and Prouff at CHES 2010, which can be considered as the state-of-the-art in software [53]. We then added the CHES 2011 polynomial masking scheme of Prouff and Roche [45] (and its implementation in [20]), as a typical example of \"glitchresistant\" solution relying on secret sharing and multiparty computation (see the discussion in the next paragraph). A similar variety of countermeasures is proposed in hardware, where we also consider an efficient but glitch-sensitive implementation proposed in [48], and a threshold AES implementation that is one of the most promising solutions to deal with glitches in this case [36]. Note that this latter implementation is based on an 8-bit architecture (rather than a 128-bit one for the others). So although our cost function is aimed at making comparisons between different architectures more reflective of the algorithms' and countermeasures' performances, more serial implementations as this one generally pay a small overhead due to their more complex control logic.Physical assumptions and glitches. As explicit in Table 1, countermeasures against side-channel attacks always rely on a number of physical assumptions.In the case of masking, a central one is that the leakage of the shares manipulated by the target implementation should be independent of each other [22]. Glitches, that are transient signals appearing during the computations in certain (e.g. CMOS) implementations, are a typical physical default that can cause this assumption to fail, as first put forward by Mangard et al. in [32]. There are two possible solutions to deal with such physical defaults: either by making explicit to cryptographic engineers that they have to prevent glitches at the physical level, or by designing countermeasures that can cope with glitches.Interestingly, the first solution is one aspect where hardware and software implementations significantly differ. Namely, while it is usually possible to ensure independent leakages in masked software, by ensuring a sufficient time separation between the manipulation of the shares, it is extremely difficult to avoid glitches in hardware [33]. Yet, even in hardware it is generally expected that the \"glitch signal\" will be more difficult to exploit by adversaries, especially if designers pay attention to this issue [35]. In this context, the main question is to determine the amplitude of this signal: if sufficiently reduced in front of the measurement noise, it may turn out that a glitch-sensitive masked implementation leads to improved security levels (compared to an unprotected one). Since this amplitude is highly technology-dependent, we will use it as a parameter to analyze the security of our hardware implementations in the next sections. Yet, we recall that it is a safe practice to focus on glitch-resistant implementations when it comes to hardware. Besides, we note that glitches are not the only physical default that may cause the independent leakage assumption to be contradicted in practice [42,51].", + "annotations": [ + { + "start": 1088, + "end": 1089, + "name": "table", + "value": "d2ce350a-25be-4d05-9061-6f1d4cf8bdd1" + }, + { + "start": 2456, + "end": 2457, + "name": "table", + "value": "d2ce350a-25be-4d05-9061-6f1d4cf8bdd1" + }, + { + "start": 1472, + "end": 1476, + "name": "bibliography_ref", + "value": "bac4e5cd-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1552, + "end": 1556, + "name": "bibliography_ref", + "value": "bac4e58b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1584, + "end": 1588, + "name": "bibliography_ref", + "value": "bac4e4c5-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1885, + "end": 1889, + "name": "bibliography_ref", + "value": "bac4e5a0-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2005, + "end": 2009, + "name": "bibliography_ref", + "value": "bac4e549-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2701, + "end": 2705, + "name": "bibliography_ref", + "value": "bac4e4d6-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2931, + "end": 2935, + "name": "bibliography_ref", + "value": "bac4e526-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3517, + "end": 3521, + "name": "bibliography_ref", + "value": "bac4e531-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3697, + "end": 3701, + "name": "bibliography_ref", + "value": "bac4e541-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 4394, + "end": 4398, + "name": "bibliography_ref", + "value": "bac4e575-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 4398, + "end": 4401, + "name": "bibliography_ref", + "value": "bac4e5bc-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.8", + "text": "Security evaluations", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.8.0", + "text": "We now move to the core of our analysis, namely the security evaluation of different implementations. For this purpose, we first need to discuss the type of security evaluation we will conduct, which can be viewed as a tradeoff between generality and informativeness. That is, one ideally wants to reach general conclusions in the sense that they are independent of the underlying device technology. A typical solution for this purpose is to evaluate the \"security order\" of a countermeasure, as defined by Coron et al. [8]. Informally, the security order corresponds to the largest moment in the leakage probability distributions that is key-independent (hence from which no information can be extracted). For example, an unprotected implementation can be attacked by computing mean values (i.e. first-order moments) [28]. By contrast, the hope of masking is to ensure that adversaries will have to estimate higher-order moments, which is expected to increase the data complexity required to extract information, as first shown by Chari et al. [5]. Evaluating the order is interesting because under the independent leakage assumption mentioned in the last section, it can be done based on the mathematical description of a countermeasure only. Of course, the informativeness of such an abstract evaluation is limited since (1) it indeed does not allow testing whether the independent leakage assumption is fulfilled, and (2) even if this assumption is fulfilled, there is no strict correspondance between the security order and the security level of an implementation (e.g. measured with a probability of success corresponding to some bounded complexities). This is because already for masking (i.e. the countermeasure that aims at increasing the security order), and even if independent leakages are observed in practice, the actual complexity of a side-channel attack highly depends on the amount of noise in the measurements. And of course, there are also countermeasures that simply do not aim at increasing the security order, e.g. shuffling [21].One appealing way to mitigate the second issue is to perform so-called \"simulated attacks\". This essentially requires to model the leakage corresponding to different sensitive operations in an idealized implementation. For example, a usual approximation is to consider that all the intermediate values during a cryptographic computation (such as the S-boxes inputs and outputs for a block cipher) leak the sum of their Hamming weight and a Gaussian distributed noise [30]. It is then possible to accurately estimate the evaluation metrics proposed in [56] (i.e. mutual information, success rate, guessing entropy) from these mathematically generated leakages. Furthermore, one can use the noise variance as a security parameter and analyze its impact on the time and data complexity of successful attacks. Quite naturally, such an alternative still does not solve the first issue (i.e. the independent leakage assumption), for which the only possibility is to evaluate the real measurements of an actual implementation, in a given technology. This latter solution is admittedly the most informative, but also the least general, and is quite intensive for comparison purposes (since it requires to have access to source codes, target devices and measurement setups for all the designs to evaluate). Interestingly, it has been shown that simulated attacks can be quite close to real ones in the context of standard DPA and masking [59]. So since our goal is to show that there exist realistic scenarios where leakage-resilient PRGs/PRFs and masking are useful ingredients to reach a given security level at the lowest cost, we will use this type of evaluations in the following.Note finally that performing simulated attacks could not be replaced by computing explicit formulae for the success rate such as, e.g. [16,52]. Indeed, these formulae only predict subkey (typically key bytes) recoveries while we consider security graphs for full 128-bit master keys. Beside, they are only applicable to unprotected devices so far, and hardly capture masked implementations and the effect of key-dependent algorithmic noise as we will consider next.", + "annotations": [ + { + "start": 520, + "end": 523, + "name": "bibliography_ref", + "value": "bac4e463-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 818, + "end": 822, + "name": "bibliography_ref", + "value": "bac4e505-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1045, + "end": 1048, + "name": "bibliography_ref", + "value": "bac4e44c-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2048, + "end": 2052, + "name": "bibliography_ref", + "value": "bac4e4cd-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2520, + "end": 2524, + "name": "bibliography_ref", + "value": "bac4e516-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2604, + "end": 2608, + "name": "bibliography_ref", + "value": "bac4e5e6-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3482, + "end": 3486, + "name": "bibliography_ref", + "value": "bac4e602-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3864, + "end": 3868, + "name": "bibliography_ref", + "value": "bac4e4a3-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3868, + "end": 3871, + "name": "bibliography_ref", + "value": "bac4e5c4-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.9", + "text": "Evaluation setups", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.9.0", + "text": "We will consider two types of setups in our evaluations: one for software, one for hardware. As illustrated in Figure 3 in the case of a Boolean-masked S-box implementation with two shares, the main difference is that the software performs all the operations sequentially, while the hardware performs them in parallel. We will further assume that the leakage of parallel operations is summed [40]. As previously mentioned, we will illustrate our analyses with a Hamming weight leakage function. Additionally, we will consider a noise variance of 10, corresponding to a Signal-to-Noise Ratio of 0.2 (as defined in [29]) 3 . This is a typical value, both for software implementations [11] and FPGA measurement boards [25].Let us denote the AES S-box as S, a byte of plaintext and key as x i and k i (respectively), the random shares used in masking as r j i (before the S-box) and m j i (after the S-box), the Hamming weight function as HW, the bitwise XOR as ⊕, the field multiplication used in polynomial masking as ⊗, and Gaussiandistributed noise random variables N j i . From these notations, we can specify the list of all our target implementations as summarized in Table 2.A couple of observations are worth being underlined as we now discuss.First, and as already mentioned, the main difference between software and hardware implementations is the number of exploitable leakage samples: there is a single such sample per plaintext in hardware while there are 16×(N m +1) ones in software (with N m the number of masks). Next, we only considered glitches in hardware (since it is generally possible to ensure independent leakage in software, by ensuring a sufficient time separation between the manipulation of the shares). We assumed that \"first-order glitches\" can appear in our Boolean-masked FPGA implementation, and modeled the impact of the mask as an additive binomial noise in this case. We further assumed that the amplitude of this first-order signal was reduced according to a factor f . This factor corresponds to the parameter used to quantify the amplitude of the glitches mentioned in the previous section. Note that this modeling is sound because the complexity of a first-order DPA only depends on the value of its SNR (which is equivalent to correlation and information theoretic metrics in this case, as proven in [31]). So even leakage functions deviating from the Hamming weight abstraction would lead to similar trends. Since the threshold implementation in [36] guarantees the absence of firstorder glitches, we only analyzed the possibility of second-order glitches for this one, and modeled them in the same way as just described (i.e. by considering the second mask M 2 i as an additive binomial noise, and reducing the amplitude of the second-order signal by a factor f ). Third, the chosen-plaintext construction of [34] is only applicable in hardware. Furthermore, we only evaluated its impact for the unprotected implementation, and the 1-mask Boolean one with glitches. As will become clear in the next section, this is because the data complexity bound to 256 (that is the maximum tolerated by design in this case) is only relevant when successful side-channel attacks occur for such small complexities (which was only observed for implementations with first-order signal).For convenience, we denoted each implementation in our experiments with three letters. The first one corresponds to the type of scenario considered, i.e. with Known (K) or carefully Chosen (C) plaintexts. The second one indicates [20,45]2nd-order KP whether we are in a Software (S) or Hardware (H) case study. The third one corresponds to the type of countermeasure selected, i.e. Unprotected (U), 1-or 2-mask Boolean (B 1 , B 2 ), 1-mask Polynomial (P 1 ) and 2-mask threshold (T 2 ). The additional star signals finally reflect the presence of (first-order or secondorder) glitches. For example, KHB * 1 is an AES design protected with a 1-mask Boolean scheme, implemented in an imperfect hardware leading to first-order glitches, and analyzed in the context of known (uniform) plaintexts.", + "annotations": [ + { + "start": 392, + "end": 396, + "name": "bibliography_ref", + "value": "bac4e568-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 613, + "end": 617, + "name": "bibliography_ref", + "value": "bac4e50e-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 682, + "end": 686, + "name": "bibliography_ref", + "value": "bac4e476-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 715, + "end": 719, + "name": "bibliography_ref", + "value": "bac4e4ee-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2339, + "end": 2343, + "name": "bibliography_ref", + "value": "bac4e51d-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2486, + "end": 2490, + "name": "bibliography_ref", + "value": "bac4e549-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2850, + "end": 2854, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3542, + "end": 3546, + "name": "bibliography_ref", + "value": "bac4e4c5-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 3546, + "end": 3548, + "name": "bibliography_ref", + "value": "bac4e58b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1177, + "end": 1178, + "name": "table", + "value": "6e093372-d147-4245-8aab-08ed5fe5c072" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.10", + "text": "Template attacks and security graphs", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.10.0", + "text": "Given the leakage functions defined in Table 2, a template attack first requires to build a leakage model. In the following, and for each byte of the AES master key, we will consider Gaussian templates for unprotected implementations, and Gaussian for masked implementations. Let us denote the probability density function of a Gaussian distribution taken on input z, with mean µ (resp. mean vector µ) and variance σ 2 (resp. covariance matrix Σ) as N (z|µ, σ 2 ) (resp. N (z|µ, Σ)). This notation directly leads to models of the form:Prfor (software and hardware) unprotected implementations and:Prfor (software and hardware) masked implementations with two shares. The formula naturally extends to more shares, by just adding more sums over the masks. Note that in these models, all the noise (including the algorithmic one in hardware implementations) is captured by the Gaussian distribution 4 . Given these models, the template adversary will accumulate information on the key bytes k i , by computing products of probabilities corresponding to multiple plaintexts. Doing so and for each key byte, he will produce lists of 256 probabilities corresponding each possible candidate ki , defined as follows:i ],with the leakage vector L (j) respectively corresponding to l (j) i (resp. l (j) ) in the context of Equ. 1 (resp. Equ. 2) and l 1,(j) i , l 2,(j) i (resp. l (j) ) in the context of Equ. 3 (resp. Equ. 4) The number of measurements is given by q in Equ. 5. Next and for each target implementation, we will repeat 100 experiments. And for each value of q in these experiments, use a rank estimation algorithm to evaluate the time complexity needed to recover the full AES master key [61]. Eventually, we will build \"security graphs\" where the attack probability of success is provided in function of a time complexity and a number of measurements.Iterative DPA against constructions with carefully chosen plaintexts. Note that while standard DPA attacks are adequate to analyze the security of unprotected and masked implementations in a known-plaintext scenario, their divide-and-conquer strategy hardly applies to the PRF in [34], with carefullychosen plaintexts leading to key-dependent algorithmic noise. This is because the (maximum 256) constants c j used in this proposal are such that all 16 bytes are always identical. Hence, a standard DPA will provide a single list of probabilities, containing information about the 16 AES key bytes at once. In this case, we additionally considered the iterative DPA described in this previous reference, which essentially works by successively removing the algorithmic noise generated by the best-rated key bytes. While such an attack can only work under the assumption that the adversary has an very precise leakage model in hand, we use it as a representative of worst-case attack against such a construction.", + "annotations": [ + { + "start": 45, + "end": 46, + "name": "table", + "value": "6e093372-d147-4245-8aab-08ed5fe5c072" + }, + { + "start": 1693, + "end": 1697, + "name": "bibliography_ref", + "value": "bac4e61b-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2137, + "end": 2141, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.11", + "text": "Experimental results", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.11.0", + "text": "For illustration, the security graph of the AES implementation KHB 1 is given in Figure 4, where we additionally provide the maximum number of measurements tolerated to maintain security levels corresponding to 2 120 , 2 100 and 2 80 time complexity. All the implementations in Table 2 have been similarly evaluated and the result of these experiments are in Appendix A, Figures 8 to 13. Note that in the aforementioned case of iterative DPA (Appendix A, Figure 14), the adversary recovers the AES key bytes but still has to find their position within the AES state, which (roughly) corresponds to 16! ≈ 2 44 possibilities [2].", + "annotations": [ + { + "start": 284, + "end": 285, + "name": "table", + "value": "6e093372-d147-4245-8aab-08ed5fe5c072" + }, + { + "start": 623, + "end": 626, + "name": "bibliography_ref", + "value": "bac4e432-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.12", + "text": "Security vs. performance tradeoffs", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.12.0", + "text": "We now combine the results in the previous sections to answer our main question. Namely, what is the best way to exploit masking and/or leakage-resilient primitives to resist standard DPA in hardware and software implementations?", + "annotations": [], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.13", + "text": "Leakage-resilient PRGs", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.13.0", + "text": "Let M be the maximum number of measurements tolerated to maintain a given security level for one of the implementations in section 4. The re-keying in leakage-resilient PRGs is such that it is exactly this number M that is limited by design (i.e. the value N in Figure 1 bounds M for the adversary), hence directly leading to security-bounded implementations. The global cost metric we use in this case can be written as M M -1 × cost f unction, where the first factor corresponds to the average number of AES encryptions that are used to produce each 128-bit output string, and the second one is the cost function of Table 1.A comparison of different leakage-resilient PRG implementations in software (i.e. based on different unprotected and protected AES implementations) is given in Figure 5 for 80-bit and 120-bit security levels (the results for 100-bit security are in Appendix A, Figure 15, left). The main observation in this context is that the straightforward implementation of the PRG with an unprotected AES design is the most efficient solution. This is mainly because moving from the smallest M value (i.e. M = 2, as imposed by the 120-bit security level in the unprotected case -see Figure 8-left) to large ones (e.g. M > 1000 for masked implementations) can only lead to a gain factor of 2 for the global cost metric, which is not justified in view of the performance overheads due to the masking. For a similar reason (i.e. the limited interest of increasing M ), the global cost metric is essentially independent of the target security level in the figure. In other words, there is little interest in decreasing this security level since it leads to poor performance improvements. The hardware implementations in Appendix A, Figures 15-right and 16 lead to essentially similar intuitions, as also witnessed by the limited impact of decreasing the amplitude of the glitch signal with the f factor (see the KHB * 1 and KHT * 2 implementations for which f = 10 in the latter figures).", + "annotations": [ + { + "start": 624, + "end": 625, + "name": "table", + "value": "d2ce350a-25be-4d05-9061-6f1d4cf8bdd1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.14", + "text": "Leakage-resilient PRFs", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.14.0", + "text": "Security-unbounded implementations. Let us now consider (stateless) leakage-resilient PRFs. As already mentioned, those constructions only bound the adversary's data complexity. The main observation in this case is that if random plaintexts are considered, such implementations can only be security-unbounded (with the slight cautionary note that we give below). This fact can be easily explained when the PRF is instantiated with an unprotected software implementation of the AES. What happens then is that the adversary can repeat his measurements to get rid of the physical noise, and consequently move from the security graph of Appendix A, Figure 8-left to the one of Appendix A, Figure 13-right. Such a \"repeating\" attack is exactly the one already mentioned in [34] to argue that bounded data complexity is not enough to bound (computational) security. In fact, it similarly applies to masked implementations. The only difference is that the adversary will not average his measurements, but rather combine them as in Equation 5. This is because given a leakage function, e.g. the Hamming weight one that leads to 9 distinguishable events, the distribution of the measurements in a masked implementation will lead to the same number of distinguishable events: the only difference is that more sampling will be necessary to distinguish them (see the appendices in [60] for a plot of these distributions). So if the number of measurements is not bounded, attacks with low time complexities as in Appendix A, Figure 13 right will always exist.One important consequence is that using the PRF construction in this context is essentially useless for all the AES implementations we consider in this paper. The only way to maintain a target security level for such stateless primitives is to limit the number of measurements by putting a constraint on the lifetime of the system. And this lifetime will be selected according to the maximum number of measurements tolerated that can be extracted from our security graphs, which now highly depends on the countermeasure selected. In other words, we can only evaluate the cost function and the security level attained independently in this case, as illustrated in Figure 6 for our software instances (the 100-bit security level is again given in Appendix A, Figure 17-left). Here, we naturally come back to the standard result that Boolean (resp. polynomial) masking increases security at the cost of performance overheads that are roughly quadratic (resp. cubic) in the number of shares. Note that the security level of the 1-mask polynomial scheme is higher than the 2-mask Boolean one for the noise variance we consider, which is consistent with the previous work of Roche and Prouff [54]. Similar conclusions are obtained with hardware implementations (Appendix A, Figure 17-right and Appendix A, Figure 18), for which the impact of glitches is now clearly visible. For example, a factor f = 10 essentially multiplies the number of measurements by f for the Boolean masking with first-order glitches, and f 2 for the threshold implementation with second-order glitches. Cautionary note. The statement that stateless leakage-resilient PRFs can only be security unbounded if known plaintexts are considered essentially relates to the fact that repeated measurements allow removing the effect of the noise and the masks in a leaking implementation. Yet, this claim should be slightly mitigated in the case of algorithmic noise in hardware implementations. Indeed, this part of the noise can only be averaged up to the data complexity bound that is imposed by the PRF design. Taking the example of our hardware implementations where all 16 S-boxes are manipulated in parallel, the SNR corresponding to algorithmic noise can be computed as the ratio between the variance of a uniformly distributed 8-bit values's Hamming weight (i.e. 2) and the variance of 15 such values (i.e. 30). Averaging this noise over M plaintexts will lead to SNRs of 1 15/M , which is already larger than 17 if M = 256 (i.e. a noise level for which the security graph will be extremely close to the worst case one of Appendix A, Figure 13-right). So although there is a \"gray area\" where a leakage-resilient PRF implemented in hardware can be (weakly) security-bounded, these contexts are of quite limited interest because the will imply bounds on the data complexity that are below 256, i.e. they anyway lead to less efficient solutions than the tweaked construction that we investigate in the next subsection.Security-bounded implementations. As just discussed, stateless primitives hardly lead to security bounded implementations if physical and algorithmic noise can be averaged -which is straightforwardly feasible in a known plaintext scenario. The tweaked construction in [34] aims at avoiding such a weakness by preventing the averaging of the algorithmic noise, thanks to the combined effect of hardware parallelism and carefully chosen plaintexts leading to keydependencies in this noise. Since only the physical noise can be averaged in this case, the bounded data complexity that the leakage-resilient PRF guarantees consequently leads to security-bounded implementations again. This is illustrated both by the standard DPAs (such as in Appendix A, Figures 10-right and 12-left) and the iterative attacks (such as in Appendix A, Figure 13) that can be performed against this PRF 5 . As in Section 5.1, we extracted the maximum data complexity D from these graphs, and produced as global cost metric:where the first factor corresponds to the (rounded) average number of AES encryptions needed to produce a 128-bit output, and the second one is the cost function of Table 1. A comparison of our different leakage-resilient PRFs instantiated with a hardware implementation of the AES and chosen plaintexts is given in Figure 7. Here again, we observe that the most efficient solution is to consider an unprotected design. Interestingly, we also observe that for the unprotected AES, the iterative attack is the worst case for the 80-bit security level (where it forces the re-keying after 97 plaintexts vs. 256 for the standard DPA), while the standard DPA is the worst-case for the 120-bit security level (where it forces the re-keying after 10 plaintexts vs. 37 for the iterative attack). This nicely fits the intuition that iterative attacks become more powerful as the data complexity increases, i.e. when the additional time complexity corresponding to the enumeration of a permutation over 16 bytes becomes small compared to the time complexity required to recover the 16 AES key bytes (unordered). ", + "annotations": [ + { + "start": 768, + "end": 772, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 4800, + "end": 4804, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1369, + "end": 1373, + "name": "bibliography_ref", + "value": "bac4e610-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 2732, + "end": 2736, + "name": "bibliography_ref", + "value": "bac4e5d7-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 5703, + "end": 5704, + "name": "table", + "value": "d2ce350a-25be-4d05-9061-6f1d4cf8bdd1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.15", + "text": "Conclusion", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.15.0", + "text": "The results in this work essentially show that masking and leakage-resilient constructions hardly combine constructively. For (stateful) PRGs, our experiments indicate that both for software and hardware implementations, a leakageresilient design instantiated with an unprotected AES is the most efficient solution to reach any given security level. For stateless PRFs, they rather show that a bounded data complexity guarantee is (mostly) ineffective in bounding the (computational) complexity of the best attacks. So implementing masking and limiting the lifetime of the cryptographic implementation is the best solution in this case. Nevertheless, the chosen-plaintext tweak proposed in [34] is an interesting exception to this conclusion, as it leads to security-bounded hardware implementations for stateless primitives that are particularly interesting from an application point-of-view, e.g. for re-synchronization, challenge-response protocols, . . . Beyond the further analysis of such constructions, their extension to software implementations is an interesting scope for further research. In this respect, the combination of a chosen-plaintext leakage-resilient PRF with the shuffling countermeasure in [62] seems promising, as it could \"emulate\" the keydependent algorithmic noise ensuring security bounds in hardware. ", + "annotations": [ + { + "start": 690, + "end": 694, + "name": "bibliography_ref", + "value": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + { + "start": 1214, + "end": 1218, + "name": "bibliography_ref", + "value": "bac4e623-f290-11ee-a6ed-b88584b4e4a1" + } + ], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.16", + "text": "A Additional figures", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.17", + "text": "\n", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.17.0", + "text": "Acknowledgements. F.-X. Standaert is an associate researcher of the . Work funded in parts by the through the project (CRASH) and the grant B- project.", + "annotations": [], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.18", + "text": "

Acknowledgements. F.-X. Standaert is an associate researcher of the Belgian Fund for Scientific Research (FNRS-F.R.S.). Work funded in parts by the European Commission through the ERC project 280141 (CRASH) and the European ISEC action grant HOME/2010/ISEC/AG/INT-011 B-CCENTRE project.

", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.18.0", + "text": "Acknowledgements. F.-X. Standaert is an associate researcher of the . Work funded in parts by the through the project (CRASH) and the grant B- project.", + "annotations": [], + "metadata": { + "paragraph_type": "raw_text", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.19", + "text": "\n", + "annotations": [], + "metadata": { + "paragraph_type": "section", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20", + "text": "bibliography", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [ + { + "node_id": "0.20.0", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e42a-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e42a-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.0.0", + "text": "Leakage-resilient symmetric encryption via re-keying", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.1", + "text": "Bertoni and Coron", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.2", + "text": "\nMichelAbdalla\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.3", + "text": "\nSoniaBelaïd\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.4", + "text": "\nPierre-AlainFouque\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.5", + "text": "4", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.0.6", + "text": "471-488", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.1", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e432-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e432-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.1.0", + "text": "Towards fresh re-keying with leakage-resilient PRFs: Cipher design principles and analysis", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.1", + "text": "Cryptology ePrint Archive", + "annotations": [], + "metadata": { + "paragraph_type": "title_journal", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.2", + "text": "\nSoniaBelaïd\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.3", + "text": "\nFabrizioDe Santis\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.4", + "text": "\nJohannHeyszl\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.5", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.6", + "text": "\nMarcelMedwed\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.7", + "text": "\nJorn-MarcSchmidt\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.8", + "text": "\nFrancois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.9", + "text": "\nStefanTillich\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.1.10", + "text": "2013. 2013", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.2", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e43e-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e43e-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.2.0", + "text": "Implementing \"practical leakage-resilient cryptography", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.2.1", + "text": "CHES 2012 Rump Session Talk", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.2.2", + "text": "\nJDaniel\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.2.3", + "text": "\nBernstein\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.2.4", + "text": "September 2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.3", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e444-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e444-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.3.0", + "text": "Cryptographic Hardware and Embedded Systems -CHES 2013 -15th International Workshop", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.2", + "text": "\nGuidoBertoni\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.3", + "text": "\nJean-SébastienCoron\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.4", + "text": "8086", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.5", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.3.6", + "text": "August 20-23, 2013. 2013", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.4", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e44c-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e44c-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.4.0", + "text": "Towards sound approaches to counteract power-analysis attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.1", + "text": "Wiener", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.2", + "text": "\nSureshChari\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.3", + "text": "\nCharanjitSJutla\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.4", + "text": "\nRJosyula\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.5", + "text": "\nPankajRao\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.6", + "text": "\nRohatgi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.7", + "text": "63", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.4.8", + "text": "398-412", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.5", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e456-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e456-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.5.0", + "text": "Template attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.1", + "text": "CHES", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.2", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.3", + "text": "\nSureshChari\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.4", + "text": "\nJosyulaRRao\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.5", + "text": "\nPankajRohatgi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.6", + "text": "2523", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.7", + "text": "13-28", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.8", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.5.9", + "text": "2002", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.6", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e461-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e461-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.6.0", + "text": "Common Criteria Portal", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.7", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e463-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e463-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.7.0", + "text": "Side channel cryptanalysis of a higher order masking scheme", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.1", + "text": "Paillier and Verbauwhede", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.2", + "text": "\nJean-SébastienCoron\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.3", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.4", + "text": "\nMatthieuRivain\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.5", + "text": "38", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.7.6", + "text": "28-44", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.8", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e46b-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e46b-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.8.0", + "text": "Cryptographic Key Length Recommendation", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.9", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e46d-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e46d-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.9.0", + "text": "Leakage-resilient pseudorandom functions and side-channel attacks on feistel networks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.2", + "text": "\nYevgeniyDodis\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.3", + "text": "\nKrzysztofPietrzak\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.4", + "text": "6223", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.5", + "text": "21-40", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.9.7", + "text": "2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.10", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e476-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e476-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.10.0", + "text": "Loïc van Oldeneel tot Oldenzeel, and Nicolas Veyrat-Charvillon. Efficient removal of random delays from embedded software implementations using hidden markov models", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.2", + "text": "\nFrançoisDurvaux\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.3", + "text": "\nMathieuRenauld\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.4", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.5", + "text": "7771", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.6", + "text": "123-140", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.7", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.10.8", + "text": "2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.11", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e480-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e480-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.11.0", + "text": "Leakage-resilient cryptography", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.1", + "text": "FOCS", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.2", + "text": "\nStefanDziembowski\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.3", + "text": "\nKrzysztofPietrzak\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.4", + "text": "293-302", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.5", + "text": "IEEE Computer Society", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.11.6", + "text": "2008", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.12", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e488-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e488-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.12.0", + "text": "François-Xavier Standaert, and Loïc van Oldeneel tot Oldenzeel. Compact implementation and performance evaluation of block ciphers in ATtiny devices", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.2", + "text": "\nThomasEisenbarth\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.3", + "text": "\nZhengGong\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.4", + "text": "\nTimGüneysu\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.5", + "text": "\nStefanHeyse\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.6", + "text": "\nSebastiaanIndesteege\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.7", + "text": "\nStéphanieKerckhof\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.8", + "text": "\nFrançoisKoeune\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.9", + "text": "\nTomislavNad\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.10", + "text": "\nThomasPlos\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.11", + "text": "\nFrancescoRegazzoni\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.12", + "text": "7374", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.13", + "text": "172-187", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.14", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.12.15", + "text": "2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.13", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e499-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e499-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.13.0", + "text": "Europay Mastercard Visa", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.14", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e49b-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e49b-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.14.0", + "text": "Practical leakageresilient symmetric cryptography", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.1", + "text": "Prouff and Schaumont", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.2", + "text": "\nSebastianFaust\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.3", + "text": "\nKrzysztofPietrzak\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.4", + "text": "\nJoachimSchipper\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.5", + "text": "46", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.14.6", + "text": "213-232", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.15", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4a3-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4a3-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.15.0", + "text": "A statistical model for dpa with novel algorithmic confusion analysis", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.1", + "text": "Prouff and Schaumont", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.2", + "text": "\nYunsiFei\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.3", + "text": "\nQiasiLuo\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.4", + "text": "\nAAdamDing\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.5", + "text": "46", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.15.6", + "text": "233-250", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.16", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4ab-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4ab-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.16.0", + "text": "Thwarting higherorder side channel analysis with additive and multiplicative maskings", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.16.1", + "text": "\nLaurieGenelle\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.16.2", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.16.3", + "text": "\nMichaëlQuisquater\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.16.4", + "text": "43", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.16.5", + "text": "240-255", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.17", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4b2-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4b2-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.17.0", + "text": "How to construct random functions (extended abstract)", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.1", + "text": "FOCS", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.2", + "text": "\nOdedGoldreich\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.3", + "text": "\nShafiGoldwasser\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.4", + "text": "\nSilvioMicali\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.5", + "text": "464-479", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.6", + "text": "IEEE Computer Society", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.17.7", + "text": "1984", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.18", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4bb-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4bb-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.18.0", + "text": "Des and differential power analysis (the \"duplication\" method)", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.1", + "text": "CHES", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.2", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.3", + "text": "\nLouisGoubin\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.4", + "text": "\nJacquesPatarin\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.5", + "text": "1717", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.6", + "text": "158-172", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.7", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.18.8", + "text": "1999", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.19", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4c5-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4c5-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.19.0", + "text": "Masking vs. multiparty computation: How large is the gap for the AES?", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.1", + "text": "Bertoni and Coron", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.2", + "text": "\nVincentGrosso\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.4", + "text": "\nSebastianFaust\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.5", + "text": "4", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.19.6", + "text": "400-416", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.20", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4cd-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4cd-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.20.0", + "text": "An AES smart card implementation resistant to power analysis attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.2", + "text": "\nChristophHerbst\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.3", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.4", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.5", + "text": "3989", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.6", + "text": "239-252", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.20.7", + "text": "2006", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.21", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4d6-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4d6-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.21.0", + "text": "Private circuits: Securing hardware against probing attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.2", + "text": "\nYuvalIshai\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.3", + "text": "\nAmitSahai\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.4", + "text": "\nDavidWagner\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.5", + "text": "2729", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.6", + "text": "463-481", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.7", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.21.8", + "text": "2003", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.22", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4e0-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4e0-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.22.0", + "text": "Advances in Cryptology -EU-ROCRYPT 2013, 32nd Annual International Conference on the Theory and Applications of Cryptographic Techniques", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.2", + "text": "\nThomasJohansson\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.3", + "text": "\nPhongQNguyen\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.4", + "text": "7881", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.5", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.22.6", + "text": "May 26-30, 2013. 2013", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.23", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4e8-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4e8-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.23.0", + "text": "Advances in Cryptology -EUROCRYPT 2009, 28th Annual International Conference on the Theory and Applications of Cryptographic Techniques", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.23.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.23.2", + "text": "5479", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.23.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.23.4", + "text": "April 26-30, 2009. 2009", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.24", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4ee-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4ee-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.24.0", + "text": "Evaluation of DPA characteristics of sasebo for board level simulation", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.1", + "text": "proceedings of COSADE 2010, 4 pages", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.2", + "text": "\nToshihiroKatashita\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.3", + "text": "\nAkashiSatoh\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.4", + "text": "\nKatsuyaKikuchi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.5", + "text": "\nHiroshiNakagawa\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.6", + "text": "\nMasahiroAoyagi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.24.7", + "text": "February 2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.25", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e4f7-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e4f7-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.25.0", + "text": "Towards green cryptography: A comparison of lightweight ciphers from the energy viewpoint", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.1", + "text": "Prouff and Schaumont", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.2", + "text": "\nStéphanieKerckhof\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.3", + "text": "\nFrançoisDurvaux\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.4", + "text": "\nCédricHocquet\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.5", + "text": "\nDavidBol\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.6", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.7", + "text": "46", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.25.8", + "text": "390-407", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.26", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e501-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e501-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.26.0", + "text": "Leak resistant cryptographic indexed key update", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.26.1", + "text": "\nCPaul\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.26.2", + "text": "\nKocher\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.27", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e505-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e505-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.27.0", + "text": "Differential power analysis", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.1", + "text": "Wiener", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.2", + "text": "\nCPaul\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.3", + "text": "\nJoshuaKocher\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.4", + "text": "\nBenjaminJaffe\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.5", + "text": "\nJun\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.6", + "text": "63", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.27.7", + "text": "388-397", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.28", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e50e-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e50e-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.28.0", + "text": "Hardware countermeasures against DPA ? a statistical analysis of their effectiveness", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.2", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.3", + "text": "2964", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.4", + "text": "222-235", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.5", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.28.6", + "text": "2004", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.29", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e516-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e516-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.29.0", + "text": "Power analysis attacksrevealing the secrets of smart cards", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.29.1", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.29.2", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.29.3", + "text": "\nThomasPopp\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.29.4", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.29.5", + "text": "2007", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.30", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e51d-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e51d-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.30.0", + "text": "One for allall for one: unifying standard differential power analysis attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.1", + "text": "IET Information Security", + "annotations": [], + "metadata": { + "paragraph_type": "title_journal", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.2", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.3", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.4", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.5", + "text": "5", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.6", + "text": "100-110", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.30.7", + "text": "2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.31", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e526-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e526-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.31.0", + "text": "Side-channel leakage of masked cmos gates", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.2", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.3", + "text": "\nThomasPopp\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.4", + "text": "\nMBerndt\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.5", + "text": "\nGammel\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.6", + "text": "3376", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.7", + "text": "351-365", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.8", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.31.9", + "text": "2005", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.32", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e531-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e531-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.32.0", + "text": "Successfully attacking masked AES hardware implementations", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.1", + "text": "Rao and Sunar", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.2", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.3", + "text": "\nNorbertPramstaller\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.4", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.5", + "text": "47", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.32.6", + "text": "157-171", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.33", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e539-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.33.0", + "text": "Towards superexponential side-channel security with efficient leakage-resilient PRFs", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.1", + "text": "Prouff and Schaumont", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.2", + "text": "\nMarcelMedwed\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.4", + "text": "\nAntoineJoux\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.5", + "text": "46", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.33.6", + "text": "193-212", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.34", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e541-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e541-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.34.0", + "text": "Glitch-free implementation of masking in modern FPGAs", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.1", + "text": "HOST", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.2", + "text": "\nAmirMoradi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.3", + "text": "\nOliverMischke\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.4", + "text": "89-95", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.5", + "text": "IEEE", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.34.6", + "text": "2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.35", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e549-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e549-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.35.0", + "text": "Pushing the limits: A very compact and a threshold implementation of AES", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.1", + "text": "\nAmirMoradi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.2", + "text": "\nAxelPoschmann\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.3", + "text": "\nSanLing\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.4", + "text": "\nChristofPaar\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.5", + "text": "\nHuaxiongWang\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.35.6", + "text": "69-88", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.36", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e551-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e551-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.36.0", + "text": "A side-channel analysis resistant description of the AES S-Box", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.2", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.3", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.4", + "text": "\nNorbertPramstaller\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.5", + "text": "\nVincentRijmen\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.6", + "text": "3557", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.7", + "text": "413-423", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.8", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.36.9", + "text": "2005", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.37", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e55c-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e55c-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.37.0", + "text": "Cryptographic Hardware and Embedded Systems -CHES 2007, 9th International Workshop", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.37.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.37.2", + "text": "4727", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.37.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.37.4", + "text": "September 10-13, 2007. 2007", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.38", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e562-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e562-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.38.0", + "text": "Advances in Cryptology -EUROCRYPT 2011 -30th Annual International Conference on the Theory and Applications of Cryptographic Techniques", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.38.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.38.2", + "text": "6632", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.38.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.38.4", + "text": "May 15-19, 2011. 2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.39", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e568-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e568-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.39.0", + "text": "Improved higher-order side-channel attacks with FPGA experiments", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.1", + "text": "Rao and Sunar", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.2", + "text": "\nEricPeeters\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.4", + "text": "\nNicolasDonckers\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.5", + "text": "\nJean-JacquesQuisquater\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.6", + "text": "47", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.39.7", + "text": "309-323", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.40", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e571-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e571-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.40.0", + "text": "A leakage-resilient mode of operation", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.40.1", + "text": "\nKrzysztofPietrzak\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.40.2", + "text": "462-482", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.41", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e575-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e575-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.41.0", + "text": "Evaluation of the masked logic style mdpl on a prototype chip", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.1", + "text": "Paillier and Verbauwhede", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.2", + "text": "\nThomasPopp\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.3", + "text": "\nMarioKirschbaum\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.4", + "text": "\nThomasZefferer\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.5", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.6", + "text": "38", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.41.7", + "text": "81-94", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.42", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e57e-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e57e-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.42.0", + "text": "Cryptographic Hardware and Embedded Systems -CHES 2011 -13th International Workshop", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.42.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.42.2", + "text": "6917", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.42.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.42.4", + "text": "September 28 -October 1, 2011. 2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.43", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e584-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e584-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.43.0", + "text": "Masking against side-channel attacks: A formal security proof", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.43.1", + "text": "Johansson and Nguyen", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.43.2", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.43.3", + "text": "\nMatthieuRivain\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.43.4", + "text": "23", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.43.5", + "text": "142-159", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.44", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e58b-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e58b-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.44.0", + "text": "Higher-order glitches free implementation of the AES using secure multi-party computation protocols", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.44.1", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.44.2", + "text": "\nThomasRoche\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.44.3", + "text": "43", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.44.4", + "text": "63-78", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.45", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e591-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e591-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.45.0", + "text": "Cryptographic Hardware and Embedded Systems -CHES 2012 -14th International Workshop", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.45.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.45.2", + "text": "7428", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.45.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.45.4", + "text": "September 9-12, 2012. 2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.46", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e597-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e597-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.46.0", + "text": "Cryptographic Hardware and Embedded Systems -CHES 2005, 7th International Workshop", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.2", + "text": "\nRJosyula\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.3", + "text": "\nBerkRao\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.4", + "text": "\nSunar\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.5", + "text": "3659", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.46.7", + "text": "August 29 -September 1, 2005. 2005", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.47", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5a0-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5a0-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.47.0", + "text": "FPGA implementations of the AES masked against power analysis attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.1", + "text": "proceedings of COSADE 2011", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.2", + "text": "\nFrancescoRegazzoni\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.3", + "text": "\nWangYi\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.4", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.5", + "text": "56-66", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.47.6", + "text": "February 2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.48", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5a8-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5a8-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.48.0", + "text": "Algebraic side-channel attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.2", + "text": "\nMathieuRenauld\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.4", + "text": "6151", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.5", + "text": "393-410", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.48.7", + "text": "2009", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.49", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5b1-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5b1-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.49.0", + "text": "Algebraic side-channel attacks on the AES: Why time also matters in DPA", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.1", + "text": "CHES", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.2", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.3", + "text": "\nFrançois-XavierMathieu Renauld\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.4", + "text": "\nNicolasStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.5", + "text": "\nVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.6", + "text": "5747", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.7", + "text": "97-111", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.8", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.49.9", + "text": "2009", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.50", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5bc-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5bc-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.50.0", + "text": "A formal study of power variability issues and sidechannel attacks for nanoscale devices", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.1", + "text": "\nFrançois-XavierMathieu Renauld\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.2", + "text": "\nNicolasStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.3", + "text": "\nDinaVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.4", + "text": "\nDenisKamel\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.5", + "text": "\nFlandre\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.50.6", + "text": "109-128", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.51", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5c4-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5c4-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.51.0", + "text": "On the exact success rate of side channel analysis in the gaussian model", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.1", + "text": "Selected Areas in Cryptography", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.2", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.3", + "text": "\nMatthieuRivain\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.4", + "text": "5381", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.5", + "text": "165-183", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.51.7", + "text": "2008", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.52", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5cd-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5cd-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.52.0", + "text": "Provably secure higher-order masking of AES", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.1", + "text": "CHES", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.2", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.3", + "text": "\nMatthieuRivain\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.4", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.5", + "text": "6225", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.6", + "text": "413-427", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.7", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.52.8", + "text": "2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.53", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5d7-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5d7-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.53.0", + "text": "Higher-order glitches free implementation of the AES using secure multi-party computation protocols extended version", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.53.1", + "text": "Cryptology ePrint Archive", + "annotations": [], + "metadata": { + "paragraph_type": "title_journal", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.53.2", + "text": "\nThomasRoche\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.53.3", + "text": "\nEmmanuelProuff\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.53.4", + "text": "2011/413, 2011", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.54", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5dd-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5dd-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.54.0", + "text": "Higher order masking of the AES", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.2", + "text": "\nKaiSchramm\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.3", + "text": "\nChristofPaar\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.4", + "text": "3860", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.5", + "text": "208-225", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.54.7", + "text": "2006", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.55", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5e6-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5e6-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.55.0", + "text": "A unified framework for the analysis of side-channel key recovery attacks", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.55.1", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.55.2", + "text": "\nTalMalkin\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.55.3", + "text": "\nMotiYung\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.55.4", + "text": "443-461", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.56", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5ec-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5ec-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.56.0", + "text": "Leakage-resilient symmetric cryptography under empirically verifiable assumptions", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.2", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.3", + "text": "\nOlivierPereira\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.4", + "text": "\nYuYu\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.5", + "text": "8042", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.6", + "text": "335-352", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.7", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.56.8", + "text": "2013", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.57", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e5f6-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e5f6-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.57.0", + "text": "Leakage resilient cryptography in practice", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.1", + "text": "Towards Hardware-Intrinsic Security, Information Security and Cryptography", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.2", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.3", + "text": "\nOlivierPereira\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.4", + "text": "\nYuYu\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.5", + "text": "\nJean-JacquesQuisquater\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.6", + "text": "\nMotiYung\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.7", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.8", + "text": "99-134", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.9", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.57.10", + "text": "2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.58", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e602-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e602-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.58.0", + "text": "The world is not enough: Another look on second-order DPA", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.2", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.3", + "text": "\nNicolasVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.4", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.5", + "text": "\nBenediktGierlichs\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.6", + "text": "\nMarcelMedwed\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.7", + "text": "\nMarkusKasper\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.8", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.9", + "text": "6477", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.10", + "text": "112-129", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.11", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.58.12", + "text": "2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.59", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e610-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e610-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.59.0", + "text": "The world is not enough: Another look on second-order DPA", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.1", + "text": "Cryptology ePrint Archive", + "annotations": [], + "metadata": { + "paragraph_type": "title_journal", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.2", + "text": "\nFrancois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.3", + "text": "\nNicolasVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.4", + "text": "\nElisabethOswald\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.5", + "text": "\nBenediktGierlichs\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.6", + "text": "\nMarcelMedwed\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.7", + "text": "\nMarkusKasper\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.8", + "text": "\nStefanMangard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.59.9", + "text": "2010/180. 2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.60", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e61b-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e61b-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.60.0", + "text": "Security evaluations beyond computing power", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.1", + "text": "Johansson and Nguyen", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.2", + "text": "\nNicolasVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.3", + "text": "\nBenoîtGérard\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.4", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.5", + "text": "23", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.60.6", + "text": "126-141", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.61", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e623-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e623-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.61.0", + "text": "Shuffling against side-channel attacks: A comprehensive study with cautionary note", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.2", + "text": "\nNicolasVeyrat-Charvillon\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.3", + "text": "\nMarcelMedwed\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.4", + "text": "\nStéphanieKerckhof\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.5", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.6", + "text": "7658", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.7", + "text": "740-757", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.8", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.61.9", + "text": "2012", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.62", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e62e-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e62e-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.62.0", + "text": "Advances in Cryptology -CRYPTO '99, 19th Annual International Cryptology Conference", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.62.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.62.2", + "text": "1666", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.62.3", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.62.4", + "text": "August 15-19, 1999. 1999", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.63", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e634-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e634-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.63.0", + "text": "Practical leakage-resilient pseudorandom objects with minimum public randomness", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.1", + "text": "Lecture Notes in Computer Science", + "annotations": [], + "metadata": { + "paragraph_type": "title_series", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.2", + "text": "\nYuYu\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.4", + "text": "7779", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_volume", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.5", + "text": "223-238", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.6", + "text": "Springer", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.63.7", + "text": "2013", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + }, + { + "node_id": "0.20.64", + "text": "", + "annotations": [], + "metadata": { + "paragraph_type": "bibliography_item", + "page_id": 0, + "line_id": 0, + "other_fields": { + "uid": "bac4e63d-f290-11ee-a6ed-b88584b4e4a1" + }, + "uid": "bac4e63d-f290-11ee-a6ed-b88584b4e4a1" + }, + "subparagraphs": [ + { + "node_id": "0.20.64.0", + "text": "Practical leakage-resilient pseudorandom generators", + "annotations": [], + "metadata": { + "paragraph_type": "title", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.1", + "text": "ACM Conference on Computer and Communications Security", + "annotations": [], + "metadata": { + "paragraph_type": "title_conference_proceedings", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.2", + "text": "\nYuYu\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.3", + "text": "\nFrançois-XavierStandaert\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.4", + "text": "\nOlivierPereira\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.5", + "text": "\nMotiYung\n", + "annotations": [], + "metadata": { + "paragraph_type": "author", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.6", + "text": "141-151", + "annotations": [], + "metadata": { + "paragraph_type": "biblScope_page", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.7", + "text": "ACM", + "annotations": [], + "metadata": { + "paragraph_type": "publisher", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + }, + { + "node_id": "0.20.64.8", + "text": "2010", + "annotations": [], + "metadata": { + "paragraph_type": "date", + "page_id": 0, + "line_id": 0, + "other_fields": {} + }, + "subparagraphs": [] + } + ] + } + ] + } + ] + }, + "tables": [ + { + "cells": [ + [ + { + "lines": [ + { + "text": "Software (8-bit)", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "code size", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "cycle", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "cost", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "physical", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "Implementations", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "(bytes)", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "count", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "function", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "assumptions", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "Unprotected [13]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "1659", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "4557", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "7.560", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "-", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "1-mask Boolean [53]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "3153", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "129 • 10 3", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "406.7", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "glitch-sensitive", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "1-mask polynomial [20, 45]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "20 682", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "1064 • 10 3", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "22 000", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "glitch-resistant", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "2-mask Boolean [53]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "3845", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "271 • 10 3", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "1042", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "glitch-sensitive", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "FPGA (Virtex-5)", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "area", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "throughput", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "cost", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "physical", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "Implementations", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "(slices)", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "(enc/sec)", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "function", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "assumptions", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "Unprotected (128-bit) [48] 1-mask Boolean (128-bit) [48] Threshold (8-bit) [36]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "478 1462 958", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "245•10 6 11 100•10 6 11 170•10 6 266", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "21.46 160.8 1499", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "-glitch-sensitive glitch-resistant", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ] + ], + "metadata": { + "page_id": 0, + "uid": "d2ce350a-25be-4d05-9061-6f1d4cf8bdd1", + "rotated_angle": 0, + "title": "Table 1 .Performance of some illustrative AES implementations." + } + }, + { + "cells": [ + [ + { + "lines": [ + { + "text": "construction", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "glitches", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "leakage function (∀ i ∈ [1; 16])", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "8-bit software", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "Unprotected [13]", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ], + [ + { + "lines": [ + { + "text": "Ref.", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + }, + { + "lines": [ + { + "text": "KSU", + "annotations": [] + } + ], + "rowspan": 1, + "colspan": 1, + "invisible": false + } + ] + ], + "metadata": { + "page_id": 0, + "uid": "6e093372-d147-4245-8aab-08ed5fe5c072", + "rotated_angle": 0, + "title": "Table 2 .List of our target implementations." + } + } + ] + }, + "metadata": { + "uid": "doc_uid_auto_bac4e6c0-f290-11ee-a6ed-b88584b4e4a1", + "file_name": "article.pdf", + "temporary_file_name": "1712241389_9.pdf", + "size": 2919334, + "modified_time": 1712241389, + "created_time": 1712241389, + "access_time": 1712241389, + "file_type": "application/pdf", + "other_fields": { + "producer": "MiKTeX pdfTeX-1.40.11", + "creator": "TeX", + "creation_date": 1392998486, + "modification_date": 1392998486 + }, + "producer": "MiKTeX pdfTeX-1.40.11", + "creator": "TeX", + "creation_date": 1392998486, + "modification_date": 1392998486 + }, + "version": "", + "warnings": [], + "attachments": [] +} \ No newline at end of file diff --git a/docs/source/_static/structure_examples/article.pdf b/docs/source/_static/structure_examples/article.pdf new file mode 100644 index 00000000..6c74f192 Binary files /dev/null and b/docs/source/_static/structure_examples/article.pdf differ diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 9163e2e9..a49a089d 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,11 +1,23 @@ Changelog ========= + +v2.2 (2023-04-17) +----------------- +Release note: `v2.2 `_ + +* `PdfTabbyReader` improved: bugs fixes, speed increase of partial PDF extraction (with parameter `pages`). +* Added benchmarks for evaluation of PDF readers performance. +* Added `ReferenceAnnotation` class. +* Fixed bug in `can_read` method for all readers. +* Added `article` structure type for parsing scientific articles using `GROBID `_ (`ArticleReader`, `ArticleStructureExtractor`). + v2.1.1 (2024-03-21) +------------------- Release note: `v2.1.1 `_ * Update README.md. * Update table and time benchmarks. -* Re-label line-classifier datasets (law, diploma, paragraphs datasets). +* Re-label line-classifier datasets (law, tz, diploma, paragraphs datasets). * Update tasker creators (for the labeling system). * Fix HTML table parsing. diff --git a/docs/source/dedoc_api_usage/api.rst b/docs/source/dedoc_api_usage/api.rst index 0a7ee82b..b06b345d 100644 --- a/docs/source/dedoc_api_usage/api.rst +++ b/docs/source/dedoc_api_usage/api.rst @@ -79,9 +79,10 @@ Api parameters description * **other** -- structure for document of any domain (:ref:`other_structure`); * **law** -- Russian laws (:ref:`law_structure`); * **tz** -- Russian technical specifications (:ref:`tz_structure`); - * **diploma** -- Russian thesis (:ref:`diploma_structure`). + * **diploma** -- Russian thesis (:ref:`diploma_structure`); + * **article** -- scientific article (:ref:`article_structure`). - This type is used for choosing a specific structure extractor after document reading. + This type is used for choosing a specific structure extractor (and, in some cases, a specific reader). * - structure_type - tree, linear diff --git a/docs/source/dedoc_api_usage/api_schema.rst b/docs/source/dedoc_api_usage/api_schema.rst index be327ba9..e6c1c228 100644 --- a/docs/source/dedoc_api_usage/api_schema.rst +++ b/docs/source/dedoc_api_usage/api_schema.rst @@ -61,6 +61,7 @@ Json schema of the output is also available during dedoc application running on .. autoattribute:: page_id .. autoattribute:: uid .. autoattribute:: rotated_angle + .. autoattribute:: title .. autoclass:: dedoc.api.schema.CellWithMeta diff --git a/docs/source/index.rst b/docs/source/index.rst index a69d157c..92582bec 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -208,6 +208,7 @@ Currently the following domains can be handled: * Russian laws (:ref:`structure description `). * Russian technical specifications (:ref:`structure description `). * Russian thesis for bachelor or master degree (:ref:`structure description `). + * English scientific articles (:ref:`structure description `). For a document of unknown or unsupported domain there is an option to use default structure extractor (``document_type=other`` at :ref:`api_parameters`), the default document structure described :ref:`here `. @@ -255,6 +256,7 @@ For a document of unknown or unsupported domain there is an option to use defaul structure_types/law structure_types/tz structure_types/diploma + structure_types/article .. toctree:: diff --git a/docs/source/modules/data_structures.rst b/docs/source/modules/data_structures.rst index 0f166da8..35efcbc8 100644 --- a/docs/source/modules/data_structures.rst +++ b/docs/source/modules/data_structures.rst @@ -116,6 +116,12 @@ Concrete annotations .. autoattribute:: name +.. autoclass:: dedoc.data_structures.ReferenceAnnotation + :show-inheritance: + :special-members: __init__ + + .. autoattribute:: name + .. autoclass:: dedoc.data_structures.BBoxAnnotation :show-inheritance: :special-members: __init__ diff --git a/docs/source/modules/readers.rst b/docs/source/modules/readers.rst index 7666f8bf..7d160735 100644 --- a/docs/source/modules/readers.rst +++ b/docs/source/modules/readers.rst @@ -73,3 +73,7 @@ dedoc.readers .. autoclass:: dedoc.readers.RawTextReader :show-inheritance: :members: + +.. autoclass:: dedoc.readers.ArticleReader + :show-inheritance: + :members: diff --git a/docs/source/modules/structure_extractors.rst b/docs/source/modules/structure_extractors.rst index 79d80f0f..08655f06 100644 --- a/docs/source/modules/structure_extractors.rst +++ b/docs/source/modules/structure_extractors.rst @@ -52,3 +52,9 @@ dedoc.structure_extractors :members: .. autoattribute:: document_type + +.. autoclass:: dedoc.structure_extractors.ArticleStructureExtractor + :show-inheritance: + :members: + + .. autoattribute:: document_type diff --git a/docs/source/parameters/structure_type.rst b/docs/source/parameters/structure_type.rst index 546ddbfc..842f6afc 100644 --- a/docs/source/parameters/structure_type.rst +++ b/docs/source/parameters/structure_type.rst @@ -30,6 +30,8 @@ Structure type configuring In this case, :class:`~dedoc.structure_extractors.TzStructureExtractor` is used. * **diploma** -- Russian thesis (:ref:`diploma_structure`) In this case, :class:`~dedoc.structure_extractors.DiplomaStructureExtractor` is used. + * **article** -- scientific article (:ref:`article_structure`) + In this case, :class:`~dedoc.readers.ArticleReader` and :class:`~dedoc.structure_extractors.ArticleStructureExtractor` are used. If you use your custom configuration, look to the documentation of :class:`~dedoc.structure_extractors.StructureExtractorComposition` diff --git a/docs/source/structure_types/article.rst b/docs/source/structure_types/article.rst new file mode 100644 index 00000000..8de477e9 --- /dev/null +++ b/docs/source/structure_types/article.rst @@ -0,0 +1,213 @@ +.. _article_structure: + +Article structure type (GROBID) +=============================== + +This structure type is used for scientific article analysis using `GROBID `_ system. + + .. note:: + + In case you use dedoc as a library or a separate Docker image (without docker-compose). + If you want to use this structure extractor, you should run GROBID service via Docker (or see `grobid running instruction `_). + + .. code-block:: shell + + docker run --rm --init --ulimit core=0 -p 8070:8070 lfoppiano/grobid:0.8.0 + +We analyze the recognition results from GROBID. The following types of objects are included in the resulting tree: + + * article's title; + * authors with their affiliations to organizations and emails; + * article's sections headers (for example `Abstract`, `Introduction`, .., `Conclusion` etc); + * tables and their content; + * bibliography; + * references on tables and bibliography items. + +There are the following line types in the article structure type: + + * ``root``; + * ``author`` (includes ``author_first_name``, ``author_surname``, ``email``); + * ``author_affiliation`` (includes ``org_name``, ``address``); + * ``abstract``; + * ``section``; + * ``bibliography``; + * ``bibliography_item`` (includes [``title`` | ``title_journal`` | ``title_series`` | ``title_conference_proceedings``], ``author``, ``biblScope_volume``, ``biblScope_pages``, ``DOI``, ``publisher``, ``date``); + * ``raw_text``. + + +You can see the :download:`example <../_static/structure_examples/article.pdf>` of the document of this structure type. +This page provides examples of this article analysis. + +Below is a description of nodes in the output tree: + + * **root**: node containing the text of the article title. + + There is only one root node in any document. + It is obligatory for any document of article type. + All other document lines are children of the root node. + We take the title's text from GROBID's TEI-XML path tag : + + .. code-block:: XML + + <fileDesc> + <titleStmt> + <title> Title's text // -> node.paragraph_type="root" + + + + * **author**: information about an author of the article. + + ``author`` nodes are children of the node ``root``. This type of node has subnodes. + + * ``author_first_name`` - tag in GROBID's output. The node doesn't have children nodes. + * ``author_surname`` - tag in GROBID's output. The node doesn't have children nodes. + * ``email`` - author's email, tag in GROBID's output. The node doesn't have children nodes. + * ``author_affiliation`` - author affiliation description. + + + GROBID's TEI-XML 's name information + + .. code-block:: XML + + // -> node.paragraph_type="author" + + Sonia // -> node.paragraph_type="author_first_name" + Belaïd // -> node.paragraph_type="author_surname" + // -> node.paragraph_type="email" + + ... + + + * **author_affiliation**: Author's affiliation description. + + ``author_affiliation`` nodes are children of the node ``author``. + This type of node has subnodes. + + * ``org_name`` - organization description, tag in GROBID's output. The node doesn't have children nodes. + * ``address`` - organization address,
tag in GROBID's output. The node doesn't have children nodes. + + GROBID's TEI-XML tag information according the affiliation `description `_ : + + .. code-block:: XML + + // -> node.paragraph_type="author" + ... + // -> node.paragraph_type="author_affiliation" + ICTEAM/ELEN/Crypto Group // -> node.paragraph_type="org_name" + Université catholique de Louvain +
+ Belgium +
+
+ + The result of parsing of the second author of the article: + + .. example of "node_id": "0.1" + + .. literalinclude:: ../_static/json_format_examples/article_example.json + :language: json + :lines: 125-198 + + * **abstract** is the article's abstract section ( tag in GROBID's output). + + * **section**: nodes of article sections (for example "Introduction", "Conclusion", "V Experiments ..." etc.). This type of node has a subnode ``raw_text``. ``section`` nodes are children of a node ``root``. + + * **bibliography** is the article's bibliography list which contains only ``bibliography_item`` nodes. + + * **bibliography_item** is the article's bibliography item description. + + ``bibliography_item`` nodes are children of the node ``bibliography``. + This type of node has subnodes. + + * ``title`` or ``title_journal`` or ``title_series`` or ``title_conference_proceedings``- name of the bibliography item. The node doesn't have children nodes. + * ``author`` - bibliography author name,
tag in GROBID's output. The node doesn't have children nodes. + * ``biblScope_volume`` - volume name, 4 tag in GROBID's output. The node doesn't have children nodes. + * ``biblScope_pages`` - volume name, tag in GROBID's output. The node doesn't have children nodes. + * ``DOI`` - bibliography DOI name, tag in GROBID's output. The node doesn't have children nodes. + * ``publisher`` - bibliography DOI name, tag in GROBID's output. The node doesn't have children nodes. + * ``date`` - publication date, tag in GROBID's output. The node doesn't have children nodes. + + + + There is GROBID's TEI-XML 's item information description `here `_ . + We parse GROBID's biblStruct and create a ``bibliography_item`` node. Example of GROBID's biblStruct: + + .. code-block:: XML + + + + + Leakage-resilient symmetric encryption via re-keying + + MichelAbdalla + + + SoniaBelaïd + + + Pierre-AlainFouque + + + + Bertoni and Coron + + 4 + + + + + + + We set paragraph_type of the title according the tag level in GROBID (see `title level's description `_): + + * For ``<level="a">`` set the ``paragraph_type="title"`` for article title or chapter title (but not thesis, see below). Here "a" stands for analytics (a part of a monograph). + * For ``<title><level="j">`` set the ``paragraph_type="title_journal"`` for journal title. + * For ``<title><level="s">`` set the ``paragraph_type="title_series"`` for series title (e.g. "Lecture Notes in Computer Science"). + * For ``<title><level="m">`` set the ``paragraph_type="title_conference_proceedings"`` for non journal bibliographical item holding the cited article, e.g. conference proceedings title. Note if a book is cited, the title of the book is annotated with ``<title level="m">``. + + We present a bibliography item as the node with fields ``paragraph_type="bibliography_item"`` and unique id ``uid="uuid"``. + All ``bibliography_item`` nodes are children of the ``bibliography`` node. + The example of the bibliography item parsing of the article in dedoc: + + .. example of "node_id": "0.20.5" + + .. literalinclude:: ../_static/json_format_examples/article_example.json + :language: json + :lines: 1745-1880 + + + * **bibliography references**: bibliography references in annotations of the article's text. + + Text can contain references on ``bibliography_item`` nodes. + For example, "Authors in [5] describe an approach ...". Here "[5]" is the reference. + We present the bibliography reference as the annotation with ``name="bibliography_ref"`` and value of bibliography item's uuid. + See documentation of the class :class:`~dedoc.data_structures.ReferenceAnnotation` for more details. + + Example of a bibliography reference in dedoc is given below. + There is a textual node with two bibliography references (with two annotations): + + .. example of "node_id": "0.15.0" + + .. literalinclude:: ../_static/json_format_examples/article_example.json + :language: json + :lines: 1085-1109 + + In the example, the annotations reference two ``bibliography_item`` nodes: + + .. example of "node_id": "0.20.33" + + .. literalinclude:: ../_static/json_format_examples/article_example.json + :language: json + :lines: 4581-4593 + + .. example of "node_id": "0.20.61" + + .. literalinclude:: ../_static/json_format_examples/article_example.json + :language: json + :lines: 7501-7513 + + * **raw_text**: node referring to a simple document line. + + It has the least importance in the document tree hierarchy, + so it is situated in the leaves of the tree. + It is nested to the node corresponding the previous line with a more important type. diff --git a/labeling/tests/run_tests_in_docker.sh b/labeling/tests/run_tests_in_docker.sh index 40ef206c..0db255fb 100755 --- a/labeling/tests/run_tests_in_docker.sh +++ b/labeling/tests/run_tests_in_docker.sh @@ -2,7 +2,7 @@ if [ "$is_test" = "true" ] then apt install -y cowsay echo "run tests" - sleep 5 + sleep 25 python3 -m unittest -v -f /labeling_root/labeling/tests/test* test_exit_code=$? if [ $test_exit_code -eq 0 ] diff --git a/requirements.txt b/requirements.txt index 3b967a2c..30469034 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ pdf2image==1.10.0 #1.14.0 - there are converting artifacts '№' != '№\n\x0c' pdfminer.six==20211012 piexif==1.1.3 pylzma==0.5.0 +pypdf==4.1.0 PyPDF2==1.27.0 pytesseract==0.3.10 python-docx==0.8.11 diff --git a/resources/benchmarks/benchmark_pdf_performance.html b/resources/benchmarks/benchmark_pdf_performance.html new file mode 100644 index 00000000..e2010e6d --- /dev/null +++ b/resources/benchmarks/benchmark_pdf_performance.html @@ -0,0 +1,236 @@ +<html> + <head> + <title>PDF performance benchmark + + + + +

Running parameters:

{}
+ +
+1 Common (1-19 pages) (37 files) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
big_table_with_merged_cells.pdf10.69±0.03 (0.69±0.03 / page)0.54±0.01 (0.54±0.01 / page)2.70±0.05 (2.70±0.05 / page)2.63±0.02 (2.63±0.02 / page)1.64±1.03 (1.64±1.03 / page)
VVP_global_table.pdf10.78±0.06 (0.78±0.06 / page)1.00±0.03 (1.00±0.03 / page)3.23±0.06 (3.23±0.06 / page)3.20±0.05 (3.20±0.05 / page)2.05±1.17 (2.05±1.17 / page)
пример.pdf10.79±0.01 (0.79±0.01 / page)0.92±0.00 (0.92±0.00 / page)1.75±0.02 (1.75±0.02 / page)1.59±0.02 (1.59±0.02 / page)1.26±0.41 (1.26±0.41 / page)
not_table.pdf11.33±0.02 (1.33±0.02 / page)1.63±0.01 (1.63±0.01 / page)5.60±0.03 (5.60±0.03 / page)5.62±0.06 (5.62±0.06 / page)3.55±2.07 (3.55±2.07 / page)
english_doc.pdf10.61±0.02 (0.61±0.02 / page)0.57±0.01 (0.57±0.01 / page)1.18±0.03 (1.18±0.03 / page)1.21±0.03 (1.21±0.03 / page)0.89±0.31 (0.89±0.31 / page)
liters_state.pdf11.03±0.04 (1.03±0.04 / page)0.63±0.01 (0.63±0.01 / page)1.65±0.06 (1.65±0.06 / page)2.03±0.04 (2.03±0.04 / page)1.33±0.54 (1.33±0.54 / page)
doc_with_long_list.pdf10.59±0.01 (0.59±0.01 / page)0.27±0.01 (0.27±0.01 / page)0.85±0.02 (0.85±0.02 / page)1.18±0.03 (1.18±0.03 / page)0.72±0.34 (0.72±0.34 / page)
example.pdf10.88±0.06 (0.88±0.06 / page)1.06±0.03 (1.06±0.03 / page)1.92±0.04 (1.92±0.04 / page)1.74±0.03 (1.74±0.03 / page)1.40±0.44 (1.40±0.44 / page)
2-column-state.pdf11.01±0.04 (1.01±0.04 / page)0.59±0.01 (0.59±0.01 / page)1.57±0.03 (1.57±0.03 / page)1.99±0.03 (1.99±0.03 / page)1.29±0.53 (1.29±0.53 / page)
14_dev_direct.pdf10.67±0.02 (0.67±0.02 / page)0.37±0.01 (0.37±0.01 / page)1.04±0.02 (1.04±0.02 / page)1.36±0.03 (1.36±0.03 / page)0.86±0.37 (0.86±0.37 / page)
example_table_with_90_orient_cells.pdf11.05±0.05 (1.05±0.05 / page)2.35±0.02 (2.35±0.02 / page)4.61±0.04 (4.61±0.04 / page)4.67±0.04 (4.67±0.04 / page)3.17±1.54 (3.17±1.54 / page)
two_column_document.pdf20.98±0.06 (0.49±0.03 / page)1.07±0.02 (0.54±0.01 / page)2.00±0.04 (1.00±0.02 / page)1.98±0.07 (0.99±0.03 / page)1.51±0.48 (0.75±0.24 / page)
example_mp_table_wo_repeate_header.pdf20.92±0.07 (0.46±0.04 / page)1.52±0.02 (0.76±0.01 / page)2.51±0.06 (1.25±0.03 / page)1.87±0.07 (0.94±0.04 / page)1.70±0.58 (0.85±0.29 / page)
mixed_pdf.pdf20.82±0.02 (0.41±0.01 / page)0.95±0.01 (0.48±0.01 / page)4.05±0.03 (2.03±0.01 / page)3.94±0.05 (1.97±0.02 / page)2.44±1.56 (1.22±0.78 / page)
example_mp_table_with_repeate_header_2.pdf20.95±0.05 (0.48±0.03 / page)1.61±0.03 (0.81±0.01 / page)2.54±0.08 (1.27±0.04 / page)1.95±0.08 (0.97±0.04 / page)1.76±0.58 (0.88±0.29 / page)
0004057v1.pdf21.00±0.03 (0.50±0.02 / page)0.87±0.01 (0.44±0.00 / page)1.92±0.03 (0.96±0.02 / page)2.03±0.07 (1.01±0.03 / page)1.45±0.52 (0.73±0.26 / page)
Document635.pdf21.67±0.04 (0.83±0.02 / page)3.13±0.08 (1.57±0.04 / page)4.82±0.07 (2.41±0.04 / page)3.37±0.08 (1.68±0.04 / page)3.25±1.12 (1.62±0.56 / page)
example_table_with_270_orient_cells.pdf21.14±0.02 (0.57±0.01 / page)2.67±0.07 (1.33±0.03 / page)6.54±0.07 (3.27±0.03 / page)6.50±0.03 (3.25±0.02 / page)4.21±2.37 (2.11±1.19 / page)
VVP_6_tables.pdf31.20±0.01 (0.40±0.00 / page)2.75±0.04 (0.92±0.01 / page)4.01±0.08 (1.34±0.03 / page)2.42±0.04 (0.81±0.01 / page)2.60±1.00 (0.87±0.33 / page)
example_with_table9.pdf31.26±0.04 (0.42±0.01 / page)5.32±0.05 (1.77±0.02 / page)12.47±0.06 (4.16±0.02 / page)12.38±0.01 (4.13±0.00 / page)7.86±4.79 (2.62±1.60 / page)
multipage_table.pdf31.10±0.06 (0.37±0.02 / page)3.56±0.03 (1.19±0.01 / page)9.10±0.10 (3.03±0.03 / page)9.05±0.07 (3.02±0.02 / page)5.70±3.48 (1.90±1.16 / page)
liao2020_merged_organized.pdf41.79±0.05 (0.45±0.01 / page)2.25±0.04 (0.56±0.01 / page)3.96±0.05 (0.99±0.01 / page)3.61±0.05 (0.90±0.01 / page)2.90±0.91 (0.73±0.23 / page)
with_header_footer_2.pdf51.46±0.01 (0.29±0.00 / page)1.98±0.01 (0.40±0.00 / page)3.44±0.04 (0.69±0.01 / page)2.87±0.03 (0.57±0.01 / page)2.44±0.77 (0.49±0.15 / page)
short_lines.pdf50.91±0.01 (0.18±0.00 / page)1.09±0.01 (0.22±0.00 / page)2.04±0.02 (0.41±0.00 / page)1.81±0.01 (0.36±0.00 / page)1.46±0.48 (0.29±0.10 / page)
prospectus.pdf69.00±0.14 (1.50±0.02 / page)12.49±0.13 (2.08±0.02 / page)21.28±0.06 (3.55±0.01 / page)17.96±0.09 (2.99±0.02 / page)15.18±4.76 (2.53±0.79 / page)
dogovor-oferty.pdf71.62±0.03 (0.23±0.00 / page)3.34±0.02 (0.48±0.00 / page)4.94±0.09 (0.71±0.01 / page)3.25±0.11 (0.46±0.02 / page)3.29±1.18 (0.47±0.17 / page)
Алан Тьюринг.pdf82.18±0.04 (0.27±0.01 / page)3.89±0.07 (0.49±0.01 / page)5.99±0.03 (0.75±0.00 / page)4.38±0.06 (0.55±0.01 / page)4.11±1.36 (0.51±0.17 / page)
multipage.pdf90.95±0.02 (0.11±0.00 / page)1.73±0.01 (0.19±0.00 / page)2.61±0.04 (0.29±0.00 / page)1.86±0.03 (0.21±0.00 / page)1.79±0.59 (0.20±0.07 / page)
with_changed_header_footer.pdf103.00±0.13 (0.30±0.01 / page)8.88±0.09 (0.89±0.01 / page)11.12±0.17 (1.11±0.02 / page)5.09±0.14 (0.51±0.01 / page)7.02±3.17 (0.70±0.32 / page)
2212.14834.pdf126.57±0.11 (0.55±0.01 / page)17.99±0.19 (1.50±0.02 / page)22.99±0.20 (1.92±0.02 / page)11.70±0.12 (0.98±0.01 / page)14.82±6.22 (1.23±0.52 / page)
s00371-018-1491-0.pdf135.53±0.07 (0.43±0.01 / page)12.59±0.13 (0.97±0.01 / page)16.19±0.14 (1.25±0.01 / page)9.11±0.09 (0.70±0.01 / page)10.86±3.97 (0.84±0.31 / page)
Successful_SAT_Encoding_Techniques.pdf132.74±0.05 (0.21±0.00 / page)7.72±0.14 (0.59±0.01 / page)9.68±0.13 (0.74±0.01 / page)4.87±0.08 (0.37±0.01 / page)6.25±2.66 (0.48±0.20 / page)
WAIT23_paper_1.pdf143.74±0.14 (0.27±0.01 / page)9.96±0.12 (0.71±0.01 / page)12.64±0.12 (0.90±0.01 / page)6.30±0.06 (0.45±0.00 / page)8.16±3.40 (0.58±0.24 / page)
S0965542513120129.pdf153.36±0.04 (0.22±0.00 / page)17.96±0.26 (1.20±0.02 / page)20.22±0.22 (1.35±0.01 / page)5.81±0.09 (0.39±0.01 / page)11.84±7.35 (0.79±0.49 / page)
INFORSID_2017_paper_34.pdf163.42±0.11 (0.21±0.01 / page)9.83±0.07 (0.61±0.00 / page)11.98±0.05 (0.75±0.00 / page)5.47±0.01 (0.34±0.00 / page)7.68±3.40 (0.48±0.21 / page)
1901.10861.pdf194.24±0.12 (0.22±0.01 / page)15.54±0.18 (0.82±0.01 / page)17.95±0.20 (0.94±0.01 / page)6.74±0.12 (0.35±0.01 / page)11.11±5.76 (0.58±0.30 / page)
applsci-12-04943.pdf196.10±0.13 (0.32±0.01 / page)29.97±0.20 (1.58±0.01 / page)33.14±0.35 (1.74±0.02 / page)9.48±0.07 (0.50±0.00 / page)19.67±12.00 (1.04±0.63 / page)
average (per page)0.54±0.330.89±0.521.75±1.221.53±1.311.18±1.06
+
+
+2 Common (20-99 pages) (19 files) + + + + + + + + + + + + + + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
IVMEM2022_paper_2.pdf205.17±0.13 (0.26±0.01 / page)30.96±0.27 (1.55±0.01 / page)34.55±0.25 (1.73±0.01 / page)8.59±0.12 (0.43±0.01 / page)19.82±13.06 (0.99±0.65 / page)
4d9f_7b15_A-Worldwide-Survey-of-Encryption-Products.pdf224.72±0.08 (0.21±0.00 / page)14.66±0.26 (0.67±0.01 / page)16.53±0.23 (0.75±0.01 / page)6.56±0.08 (0.30±0.00 / page)10.62±5.07 (0.48±0.23 / page)
4-МГУ-Тулин-Д-И.pdf243.91±0.04 (0.16±0.00 / page)14.74±0.15 (0.61±0.01 / page)16.76±0.17 (0.70±0.01 / page)6.02±0.03 (0.25±0.00 / page)10.36±5.49 (0.43±0.23 / page)
4-МГУ-Попов-М-С.pdf243.73±0.10 (0.16±0.00 / page)16.46±0.16 (0.69±0.01 / page)18.62±0.13 (0.78±0.01 / page)5.85±0.05 (0.24±0.00 / page)11.17±6.47 (0.47±0.27 / page)
EtatArtRecSys15fev2021.pdf245.38±0.04 (0.22±0.00 / page)22.15±0.20 (0.92±0.01 / page)24.75±0.21 (1.03±0.01 / page)8.02±0.06 (0.33±0.00 / page)15.08±8.48 (0.63±0.35 / page)
US_DHS AS_2021.pdf254.42±0.08 (0.18±0.00 / page)21.16±0.21 (0.85±0.01 / page)23.72±0.15 (0.95±0.01 / page)7.19±0.08 (0.29±0.00 / page)14.12±8.43 (0.56±0.34 / page)
pbedrin_diploma.pdf285.15±0.08 (0.18±0.00 / page)17.15±0.16 (0.61±0.01 / page)19.00±0.21 (0.68±0.01 / page)7.00±0.04 (0.25±0.00 / page)12.08±6.07 (0.43±0.22 / page)
DK_FSP_2018.pdf285.40±0.07 (0.19±0.00 / page)27.79±0.28 (0.99±0.01 / page)30.32±0.12 (1.08±0.00 / page)7.91±0.06 (0.28±0.00 / page)17.86±11.27 (0.64±0.40 / page)
2111.15664.pdf2912.72±0.15 (0.44±0.01 / page)47.80±0.34 (1.65±0.01 / page)52.20±0.47 (1.80±0.02 / page)17.21±0.10 (0.59±0.00 / page)32.48±17.66 (1.12±0.61 / page)
4-физтех-Шишацкий-М-Н-230301.pdf306.03±0.10 (0.20±0.00 / page)42.41±0.26 (1.41±0.01 / page)45.24±0.13 (1.51±0.00 / page)8.81±0.16 (0.29±0.01 / page)25.62±18.26 (0.85±0.61 / page)
romanov_diplom.pdf335.57±0.06 (0.17±0.00 / page)24.20±0.24 (0.73±0.01 / page)26.96±0.17 (0.82±0.01 / page)8.30±0.09 (0.25±0.00 / page)16.26±9.42 (0.49±0.29 / page)
J93-2003.pdf5010.75±0.13 (0.21±0.00 / page)37.15±0.49 (0.74±0.01 / page)40.22±0.43 (0.80±0.01 / page)13.81±0.15 (0.28±0.00 / page)25.48±13.30 (0.51±0.27 / page)
diploma.pdf556.48±0.06 (0.12±0.00 / page)32.26±0.40 (0.59±0.01 / page)34.27±0.37 (0.62±0.01 / page)8.50±0.16 (0.15±0.00 / page)20.38±12.93 (0.37±0.24 / page)
s11263-020-01359-2.pdf5715.50±0.16 (0.27±0.00 / page)69.17±0.59 (1.21±0.01 / page)73.52±0.59 (1.29±0.01 / page)20.12±0.59 (0.35±0.01 / page)44.58±26.87 (0.78±0.47 / page)
FI_DR_2021.pdf6510.28±0.13 (0.16±0.00 / page)30.52±0.33 (0.47±0.01 / page)34.09±0.33 (0.52±0.01 / page)13.73±0.06 (0.21±0.00 / page)22.15±10.31 (0.34±0.16 / page)
FULLTEXT01.pdf6914.38±0.29 (0.21±0.00 / page)108.70±0.78 (1.58±0.01 / page)111.27±0.71 (1.61±0.01 / page)17.10±0.20 (0.25±0.00 / page)62.86±47.15 (0.91±0.68 / page)
FI_AS_2021.pdf7313.34±0.22 (0.18±0.00 / page)159.51±0.74 (2.19±0.01 / page)162.66±0.59 (2.23±0.01 / page)16.45±0.10 (0.23±0.00 / page)87.99±73.11 (1.21±1.00 / page)
АксеноваЕЛ_628.pdf747.81±0.15 (0.11±0.00 / page)53.75±0.35 (0.73±0.00 / page)55.60±0.43 (0.75±0.01 / page)9.68±0.06 (0.13±0.00 / page)31.71±22.99 (0.43±0.31 / page)
Научно-технический_отчет_(заключительный)_по_договору.pdf8317.85±3.65 (0.22±0.04 / page)414.40±3.30 (4.99±0.04 / page)416.25±3.49 (5.02±0.04 / page)19.78±3.37 (0.24±0.04 / page)217.07±198.29 (2.62±2.39 / page)
average (per page)0.20±0.071.22±1.001.30±0.990.28±0.100.75±0.87
+
+
+3 Common (100-500 pages) (6 files) + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
FI_SDP_2012.pdf12111.36±0.16 (0.09±0.00 / page)91.62±0.51 (0.76±0.00 / page)93.79±0.33 (0.78±0.00 / page)13.73±0.23 (0.11±0.00 / page)52.62±40.10 (0.43±0.33 / page)
FI_SDP_2009.pdf14213.41±0.20 (0.09±0.00 / page)68.36±0.56 (0.48±0.00 / page)71.24±0.35 (0.50±0.00 / page)15.82±0.07 (0.11±0.00 / page)42.21±27.63 (0.30±0.19 / page)
1_Гарри_Поттер_и_философский_камень.pdf24415.80±0.36 (0.06±0.00 / page)257.90±1.86 (1.06±0.01 / page)269.97±1.02 (1.11±0.00 / page)28.16±0.10 (0.12±0.00 / page)142.96±121.13 (0.59±0.50 / page)
2.Гарри_Поттер_и_Тайная_комната.pdf33129.36±0.42 (0.09±0.00 / page)333.94±0.86 (1.01±0.00 / page)348.22±0.65 (1.05±0.00 / page)43.24±0.26 (0.13±0.00 / page)188.69±152.55 (0.57±0.46 / page)
3.Гарри_Поттер_и_узник_Азкабана.pdf40234.64±0.48 (0.09±0.00 / page)440.42±1.95 (1.10±0.00 / page)453.02±0.98 (1.13±0.00 / page)48.17±0.12 (0.12±0.00 / page)244.06±202.77 (0.61±0.50 / page)
dbguide.pdf52139.39±0.56 (0.08±0.00 / page)2188.78±7.38 (4.20±0.01 / page)2198.74±4.19 (4.22±0.01 / page)47.34±0.60 (0.09±0.00 / page)1118.56±1075.22 (2.15±2.06 / page)
average (per page)0.08±0.011.43±1.261.46±1.250.11±0.010.77±1.11
+
+
+4 Common (750+ pages) (4 files) + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
7.Гарри_Поттер_и_Дары_Смерти.pdf76864.41±0.68 (0.08±0.00 / page)1227.33±3.57 (1.60±0.00 / page)1243.04±1.88 (1.62±0.00 / page)81.81±4.67 (0.11±0.01 / page)654.15±581.10 (0.85±0.76 / page)
Python-k-vershinam-masterstva_RuLit_Me_639739.pdf76961.92±1.44 (0.08±0.00 / page)1934.12±1.49 (2.52±0.00 / page)1948.38±3.14 (2.53±0.00 / page)69.99±0.84 (0.09±0.00 / page)1003.60±937.67 (1.31±1.22 / page)
5.Гарри_Поттер_и_Орден_Феникса.pdf96781.26±1.67 (0.08±0.00 / page)1815.83±1.87 (1.88±0.00 / page)1838.23±2.16 (1.90±0.00 / page)95.31±0.91 (0.10±0.00 / page)957.66±869.43 (0.99±0.90 / page)
NNDesign.pdf1012108.70±1.69 (0.11±0.00 / page)22116.06±15.78 (21.85±0.02 / page)22224.07±83.69 (21.96±0.08 / page)132.00±9.06 (0.13±0.01 / page)11145.21±11025.01 (11.01±10.89 / page)
average (per page)0.09±0.016.96±8.607.00±8.640.11±0.023.54±7.00
+
+
+5 FinTOC 2022 (en, 1-19 pages) (6 files) + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
EdR_Private_Equity_Select_Access_Fund_S.A._SICAV-SIF-Amethis_II__Sub-Fund_2018_K_X_X_X.pdf32.07±0.25 (0.69±0.08 / page)3.41±0.17 (1.14±0.06 / page)5.35±0.26 (1.78±0.09 / page)3.94±0.11 (1.31±0.04 / page)3.69±1.19 (1.23±0.40 / page)
LU0992626050-FR0011269182-FR0011269190-FR0010312660-FR0010148981-LU0992625912-LU0992625839-LU0992626134_English_2012_Carmignac.pdf124.44±0.54 (0.37±0.05 / page)12.27±0.62 (1.02±0.05 / page)15.12±0.72 (1.26±0.06 / page)7.01±0.16 (0.58±0.01 / page)9.71±4.25 (0.81±0.35 / page)
CH0002775168_English_2016_BSIMultihelvetia.pdf166.06±0.41 (0.38±0.03 / page)17.97±0.87 (1.12±0.05 / page)21.24±0.54 (1.33±0.03 / page)9.58±0.28 (0.60±0.02 / page)13.72±6.16 (0.86±0.38 / page)
LU0035346187-LU0035345882_English_2012_UBS-Lux-BondFundGBPP.pdf1710.76±0.80 (0.63±0.05 / page)25.54±0.55 (1.50±0.03 / page)31.19±0.70 (1.83±0.04 / page)16.16±0.49 (0.95±0.03 / page)20.91±7.98 (1.23±0.47 / page)
LU0415166403-LU0033050237-LU0033049577_English_2012_UBS-Lux-BondFundEUR.pdf1711.03±0.61 (0.65±0.04 / page)25.70±0.92 (1.51±0.05 / page)31.17±1.06 (1.83±0.06 / page)16.36±0.17 (0.96±0.01 / page)21.06±7.89 (1.24±0.46 / page)
LU0214904665-LU0214905043_English_2012_UBS-Lux-BSEmergingEuropeEURP.pdf188.58±0.49 (0.48±0.03 / page)18.57±0.75 (1.03±0.04 / page)22.47±0.77 (1.25±0.04 / page)12.44±0.33 (0.69±0.02 / page)15.51±5.41 (0.86±0.30 / page)
average (per page)0.53±0.141.22±0.211.55±0.280.85±0.261.04±0.44
+
+
+6 FinTOC 2022 (en, 20-99 pages) (61 files) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
Credit_Suisse_Portfolio_Fund_(Lux)_2011_X_P_X_X.pdf236.69±0.10 (0.29±0.00 / page)17.85±0.08 (0.78±0.00 / page)20.77±0.08 (0.90±0.00 / page)9.91±0.06 (0.43±0.00 / page)13.81±5.72 (0.60±0.25 / page)
Bantleon_AnleihenFonds_LU0524467833_2012_X_P_R_A.pdf244.68±0.07 (0.19±0.00 / page)16.83±0.04 (0.70±0.00 / page)18.77±0.11 (0.78±0.00 / page)6.76±0.01 (0.28±0.00 / page)11.76±6.12 (0.49±0.26 / page)
LU0129337548-LU0129337381_English_2009_CS-Lux-EuropeanPropertyEquityFd-.pdf246.98±0.04 (0.29±0.00 / page)25.75±0.22 (1.07±0.01 / page)29.10±0.82 (1.21±0.03 / page)10.13±0.06 (0.42±0.00 / page)17.99±9.58 (0.75±0.40 / page)
LU0118405827_English_2013_SEBConceptBiotechnology.pdf294.65±0.03 (0.16±0.00 / page)13.79±0.03 (0.48±0.00 / page)15.55±0.56 (0.54±0.02 / page)6.42±0.04 (0.22±0.00 / page)10.10±4.66 (0.35±0.16 / page)
Columbus_EM_Debt_Strategy_FR0013204161_2018_K_P_R_X.pdf294.96±0.04 (0.17±0.00 / page)18.65±0.10 (0.64±0.00 / page)20.77±0.32 (0.72±0.01 / page)7.28±0.03 (0.25±0.00 / page)12.92±6.89 (0.45±0.24 / page)
LU0337413677-LU0634998545-LU0337414568-LU0764661145-LU0764660501-LU0337413834-LU0764661574-LU0764660840-LU0371477885-LU0634998206-LU0337414139-LU0337414642-LU0634998461-LU0337414485-LU0834220419_English_2012_Bantleon.pdf325.95±0.07 (0.19±0.00 / page)23.17±0.20 (0.72±0.01 / page)25.05±0.16 (0.78±0.01 / page)8.25±0.05 (0.26±0.00 / page)15.61±8.57 (0.49±0.27 / page)
LU0116762864-LU0041228874_English_2011_Deka-Renten-Euro3-7CF.pdf345.26±0.01 (0.15±0.00 / page)21.61±0.21 (0.64±0.01 / page)23.31±0.10 (0.69±0.00 / page)7.52±0.11 (0.22±0.00 / page)14.43±8.10 (0.42±0.24 / page)
LU0097655574-LU0097654924_English_2012_Deka-EuroStocks.pdf355.36±0.04 (0.15±0.00 / page)23.66±0.24 (0.68±0.01 / page)25.35±0.12 (0.72±0.00 / page)7.57±0.08 (0.22±0.00 / page)15.49±9.08 (0.44±0.26 / page)
Credit_Suisse_Fund_I_(Lux)_2012_X_P_X_X.pdf369.90±0.21 (0.28±0.01 / page)31.38±0.32 (0.87±0.01 / page)34.62±0.78 (0.96±0.02 / page)13.41±0.11 (0.37±0.00 / page)22.33±10.81 (0.62±0.30 / page)
LU0456547701-LU0120526693_English_2013_SEBHighYield.pdf375.59±0.04 (0.15±0.00 / page)19.79±0.16 (0.53±0.00 / page)21.37±0.13 (0.58±0.00 / page)7.55±0.01 (0.20±0.00 / page)13.58±7.06 (0.37±0.19 / page)
BI_SICAV_2011_X_P_X_X.pdf389.57±0.14 (0.25±0.00 / page)53.12±0.38 (1.40±0.01 / page)55.43±0.05 (1.46±0.00 / page)12.80±0.06 (0.34±0.00 / page)32.73±21.59 (0.86±0.57 / page)
LU1009600286-LU0098995292-LU1009600955-LU0404639410_English_2012_UBS-Lux-Eq-USAMultiStrat--.pdf4025.72±0.93 (0.64±0.02 / page)69.50±0.76 (1.74±0.02 / page)75.60±0.28 (1.89±0.01 / page)31.59±0.97 (0.79±0.02 / page)50.60±22.17 (1.27±0.55 / page)
OYSTER_2005_X_P_X_X.pdf406.19±0.13 (0.15±0.00 / page)29.57±0.22 (0.74±0.01 / page)31.03±0.13 (0.78±0.00 / page)8.41±0.07 (0.21±0.00 / page)18.80±11.54 (0.47±0.29 / page)
CANDRIAM_GF_LU1220230442_2015_K_P_R_A.pdf427.11±0.04 (0.17±0.00 / page)31.56±0.23 (0.75±0.01 / page)33.32±0.10 (0.79±0.00 / page)9.30±0.05 (0.22±0.00 / page)20.32±12.16 (0.48±0.29 / page)
LU0091821107_English_2013_CarnegieGlobalHealthcareFund.pdf426.39±0.01 (0.15±0.00 / page)22.07±0.19 (0.53±0.00 / page)23.78±0.25 (0.57±0.01 / page)8.36±0.06 (0.20±0.00 / page)15.15±7.83 (0.36±0.19 / page)
LU0611173930-LU0626901861-LU0611173427_English_2011_UBS-Lux-Eq-SICAVGl-HDUSDP.pdf4241.30±1.51 (0.98±0.04 / page)832.00±4.68 (19.81±0.11 / page)847.74±8.20 (20.18±0.20 / page)52.15±0.10 (1.24±0.00 / page)443.30±396.66 (10.55±9.44 / page)
GB00B94CTF25-GB00B39R2V77-GB00B39R2T55-GB00BK6MCK32-GB00B46J9127-GB00B39R2S49-GB00B39R2W84-GB00BK6MC.pdf439.66±0.12 (0.22±0.00 / page)67.85±0.31 (1.58±0.01 / page)71.24±0.59 (1.66±0.01 / page)12.84±0.06 (0.30±0.00 / page)40.40±29.20 (0.94±0.68 / page)
PRO_NFOF_eng_LU.pdf4310.14±0.10 (0.24±0.00 / page)45.38±0.20 (1.06±0.00 / page)48.32±0.15 (1.12±0.00 / page)13.28±0.14 (0.31±0.00 / page)29.28±17.64 (0.68±0.41 / page)
LU1150262910-LU1150255971-LU1150259296_English_2015_BNPParibasIslamicFundHilalIncome.pdf448.60±0.21 (0.20±0.00 / page)48.44±0.66 (1.10±0.01 / page)50.73±0.04 (1.15±0.00 / page)11.46±0.16 (0.26±0.00 / page)29.81±19.82 (0.68±0.45 / page)
LU0073418229-LU0280479329_English_2010_BaringRussiaFundAUSD.pdf467.36±0.23 (0.16±0.01 / page)22.81±0.40 (0.50±0.01 / page)24.59±0.05 (0.53±0.00 / page)9.51±0.18 (0.21±0.00 / page)16.07±7.70 (0.35±0.17 / page)
ALGER_SICAV_LU0070176184_2012_X_P_X_X.pdf467.56±0.15 (0.16±0.00 / page)28.77±0.08 (0.63±0.00 / page)30.85±0.05 (0.67±0.00 / page)9.94±0.04 (0.22±0.00 / page)19.28±10.59 (0.42±0.23 / page)
LU0562934934-LU0289591256_English_2015_EastCapital-Lux-ChinaEnviron-.pdf479.30±0.10 (0.20±0.00 / page)29.55±0.50 (0.63±0.01 / page)31.71±0.14 (0.67±0.00 / page)12.00±0.04 (0.26±0.00 / page)20.64±10.07 (0.44±0.21 / page)
LU0346062424_English_2015_QuestCleantechBAcc.pdf486.73±0.04 (0.14±0.00 / page)23.13±0.44 (0.48±0.01 / page)24.64±0.08 (0.51±0.00 / page)8.55±0.03 (0.18±0.00 / page)15.76±8.17 (0.33±0.17 / page)
LU0145217120-LU0323243989_English_2015_ShareGold.pdf507.32±0.05 (0.15±0.00 / page)24.83±0.46 (0.50±0.01 / page)26.25±0.11 (0.53±0.00 / page)9.11±0.03 (0.18±0.00 / page)16.88±8.70 (0.34±0.17 / page)
LU0230834854-LU0486541344-LU0254675159-LU0230242686-LU0230242504_English_2015_RobecoFlex-o-.pdf508.27±0.12 (0.17±0.00 / page)34.55±0.80 (0.69±0.02 / page)35.97±0.08 (0.72±0.00 / page)10.66±0.11 (0.21±0.00 / page)22.36±12.94 (0.45±0.26 / page)
LU0144591038_English_2015_TurkisfundBondsI.pdf537.34±0.01 (0.14±0.00 / page)25.25±0.69 (0.48±0.01 / page)26.66±0.04 (0.50±0.00 / page)9.62±0.08 (0.18±0.00 / page)17.22±8.80 (0.32±0.17 / page)
LU0313364811-LU0313363508_English_2016_MultiM-AccessII-.pdf548.65±0.02 (0.16±0.00 / page)52.08±0.38 (0.96±0.01 / page)54.52±0.20 (1.01±0.00 / page)11.37±0.02 (0.21±0.00 / page)31.65±21.69 (0.59±0.40 / page)
LU0273373414_English_2010_GS-PFondsSchwellenlanderG.pdf557.92±0.08 (0.14±0.00 / page)27.93±0.10 (0.51±0.00 / page)29.55±0.06 (0.54±0.00 / page)9.76±0.05 (0.18±0.00 / page)18.79±9.99 (0.34±0.18 / page)
LU0244071956-LU0301247077-LU0301246772_English_2009_LTIF-Classic.pdf557.65±0.24 (0.14±0.00 / page)25.13±0.11 (0.46±0.00 / page)26.88±0.13 (0.49±0.00 / page)9.62±0.10 (0.17±0.00 / page)17.32±8.73 (0.31±0.16 / page)
LU0415391431-LU0415391514-LU0433847679-LU0415392249-LU0437409112-LU0513479948-LU0513479864-LU0433846606-LU0453818972-LU0631859575-LU0415391605-LU0415392751-LU0494761835-LU0453818899-LU0631859229_English_2011_BB.pdf5611.43±0.19 (0.20±0.00 / page)76.86±0.07 (1.37±0.00 / page)80.39±0.23 (1.44±0.00 / page)14.92±0.08 (0.27±0.00 / page)45.90±32.77 (0.82±0.59 / page)
BSI-Multinvest_2015_X_P_X_A.pdf5710.28±0.24 (0.18±0.00 / page)64.32±0.07 (1.13±0.00 / page)67.12±0.08 (1.18±0.00 / page)13.16±0.07 (0.23±0.00 / page)38.72±27.04 (0.68±0.47 / page)
Universe_The_CMI_Global_Network_Fund_2007_X_P_X_X.pdf588.01±0.17 (0.14±0.00 / page)34.83±0.06 (0.60±0.00 / page)36.58±0.07 (0.63±0.00 / page)9.86±0.02 (0.17±0.00 / page)22.32±13.42 (0.38±0.23 / page)
LU0066480616-LU0208183011_English_2012_ValartisRussianMarketFund.pdf587.36±0.10 (0.13±0.00 / page)25.37±0.09 (0.44±0.00 / page)27.10±0.03 (0.47±0.00 / page)9.30±0.07 (0.16±0.00 / page)17.28±9.00 (0.30±0.16 / page)
Henderson_Euroland_Fund_2012_X_P_X_A.pdf6316.30±0.46 (0.26±0.01 / page)42.19±0.20 (0.67±0.00 / page)47.49±0.10 (0.75±0.00 / page)21.59±0.21 (0.34±0.00 / page)31.90±13.22 (0.51±0.21 / page)
DNB_FUND_LU0029375739_2014_X_P_X_X.pdf659.67±0.13 (0.15±0.00 / page)36.89±0.09 (0.57±0.00 / page)38.82±0.11 (0.60±0.00 / page)11.67±0.11 (0.18±0.00 / page)24.26±13.63 (0.37±0.21 / page)
Fidelity_Active_STrategy_LU1048814831_2016_X_P_X_A.pdf6511.67±0.30 (0.18±0.00 / page)72.04±0.16 (1.11±0.00 / page)74.20±0.11 (1.14±0.00 / page)14.06±0.02 (0.22±0.00 / page)42.99±30.15 (0.66±0.46 / page)
LU0447610410-LU0931136328_English_2015_HSBCPortfoliosWorldSelect-1.pdf669.96±0.09 (0.15±0.00 / page)53.60±0.09 (0.81±0.00 / page)55.72±0.08 (0.84±0.00 / page)12.37±0.10 (0.19±0.00 / page)32.91±21.78 (0.50±0.33 / page)
SEB_SICAV_2_LU0086813762_2013_X_P_X_X.pdf678.51±0.03 (0.13±0.00 / page)69.62±0.12 (1.04±0.00 / page)71.84±0.21 (1.07±0.00 / page)10.98±0.05 (0.16±0.00 / page)40.24±30.51 (0.60±0.46 / page)
MainFirst_LU0152754726_2011_X_P_X_X.pdf678.37±0.06 (0.12±0.00 / page)36.39±0.10 (0.54±0.00 / page)38.04±0.06 (0.57±0.00 / page)10.29±0.09 (0.15±0.00 / page)23.27±13.97 (0.35±0.21 / page)
JULIUS_BAER_MULTIBOND_LU0189697427_2011_X_P_X_X.pdf6910.97±0.12 (0.16±0.00 / page)56.18±0.07 (0.81±0.00 / page)58.29±0.03 (0.84±0.00 / page)13.39±0.06 (0.19±0.00 / page)34.71±22.56 (0.50±0.33 / page)
LU0012197231-LU0372412295-LU0012197314-LU0261938939-LU0261938004-LU0372412022-LU0100838696-LU0261938426-LU0372411990_English_2011_JBGlobalConvertBd-.pdf6911.03±0.17 (0.16±0.00 / page)56.17±0.13 (0.81±0.00 / page)58.29±0.07 (0.84±0.00 / page)13.29±0.19 (0.19±0.00 / page)34.70±22.56 (0.50±0.33 / page)
LU0030165871_English_2014_SEBNordicFund.pdf738.51±0.05 (0.12±0.00 / page)41.51±0.09 (0.57±0.00 / page)43.29±0.06 (0.59±0.00 / page)10.43±0.04 (0.14±0.00 / page)25.93±16.49 (0.36±0.23 / page)
LU0212018807_English_2013_SFPCEuropeanPropertySecurities.pdf7410.64±0.10 (0.14±0.00 / page)48.30±0.08 (0.65±0.00 / page)50.26±0.08 (0.68±0.00 / page)13.10±0.06 (0.18±0.00 / page)30.57±18.74 (0.41±0.25 / page)
Prospectus AXA IM Cash en_FCP.pdf759.59±0.12 (0.13±0.00 / page)36.29±0.13 (0.48±0.00 / page)38.12±0.07 (0.51±0.00 / page)11.63±0.02 (0.16±0.00 / page)23.90±13.33 (0.32±0.18 / page)
BE0946843266-BE0947250453-BE0133741752-BE0175280016-BE0175279976-BE0946844272-BE0175717504-BE0175479.pdf769.39±0.03 (0.12±0.00 / page)91.41±0.17 (1.20±0.00 / page)93.36±0.14 (1.23±0.00 / page)11.83±0.01 (0.16±0.00 / page)51.50±40.90 (0.68±0.54 / page)
LU0026740844-LU0026740760-LU0099389313_English_2011_JBEuro-FocusEUR.pdf7712.95±0.04 (0.17±0.00 / page)61.40±0.11 (0.80±0.00 / page)64.12±0.08 (0.83±0.00 / page)15.64±0.13 (0.20±0.00 / page)38.53±24.27 (0.50±0.32 / page)
LU0529497777-LU0529498072-LU0529497934-LU0529497850-LU0529497694-LU0529498239-LU0529498155-LU0529498742-LU0529497421-LU0529498825-LU0529498668_English_2011_JBAbso-ReturnEuro-Eq-.pdf7813.60±0.14 (0.17±0.00 / page)62.68±0.15 (0.80±0.00 / page)65.44±0.08 (0.84±0.00 / page)16.33±0.04 (0.21±0.00 / page)39.51±24.59 (0.51±0.32 / page)
BDLCM-Funds-Prospectus-VISA-28042020.pdf7912.40±0.11 (0.16±0.00 / page)118.46±0.33 (1.50±0.00 / page)121.09±0.07 (1.53±0.00 / page)15.45±0.06 (0.20±0.00 / page)66.85±52.95 (0.85±0.67 / page)
1.9.900555.pdf8214.74±0.17 (0.18±0.00 / page)57.43±0.48 (0.70±0.01 / page)60.15±0.08 (0.73±0.00 / page)17.85±0.11 (0.22±0.00 / page)37.54±21.30 (0.46±0.26 / page)
LU0302081707-LU0267827326-LU0363285338-LU0494360927-LU0419264733-LU0232040708-LU0363285411-LU0363286658-LU0363286732-LU0363285767-LU0302080998-LU0232043801-LU0232043124-LU0267829611-LU0494361065_English_2013_WIOF.pdf8410.84±0.03 (0.13±0.00 / page)117.39±0.63 (1.40±0.01 / page)118.98±0.18 (1.42±0.00 / page)13.49±0.08 (0.16±0.00 / page)65.17±53.02 (0.78±0.63 / page)
MARCH_INTERNATIONAL_2015_X_P_X_X.pdf8511.28±0.02 (0.13±0.00 / page)49.33±0.66 (0.58±0.01 / page)51.06±0.06 (0.60±0.00 / page)13.58±0.03 (0.16±0.00 / page)31.31±18.91 (0.37±0.22 / page)
GB00BR4R5445-GB00BR4R5551-GB0033772624_English_2016_DimensionalEm.pdf8511.83±0.18 (0.14±0.00 / page)123.81±1.81 (1.46±0.02 / page)125.51±0.36 (1.48±0.00 / page)14.69±0.07 (0.17±0.00 / page)68.96±55.72 (0.81±0.66 / page)
LU0028051117_English_2014_CMIUKEquityFundDC2.pdf8710.95±0.14 (0.13±0.00 / page)45.25±0.36 (0.52±0.00 / page)46.93±0.11 (0.54±0.00 / page)12.74±0.06 (0.15±0.00 / page)28.97±17.14 (0.33±0.20 / page)
LU0146081418-LU0028047438-LU0129306311_English_2014_CMIUSEnhancedEquityDC.pdf8710.90±0.19 (0.13±0.00 / page)45.67±0.95 (0.52±0.01 / page)46.99±0.23 (0.54±0.00 / page)12.81±0.10 (0.15±0.00 / page)29.09±17.26 (0.33±0.20 / page)
Arabesque_SICAV_LU1023698662_2016_X_P_X_A.pdf8711.61±0.26 (0.13±0.00 / page)96.20±0.57 (1.11±0.01 / page)98.38±0.17 (1.13±0.00 / page)14.17±0.07 (0.16±0.00 / page)55.09±42.22 (0.63±0.49 / page)
LU1151059737-LU1151057954_English_2015_RICHELIEUB--Richelieu2020.pdf8913.42±0.24 (0.15±0.00 / page)124.81±0.28 (1.40±0.00 / page)127.68±0.09 (1.43±0.00 / page)16.38±0.04 (0.18±0.00 / page)70.57±55.69 (0.79±0.63 / page)
LU0477234263-LU0466397824_English_2015_R.pdf8913.40±0.21 (0.15±0.00 / page)124.77±0.32 (1.40±0.00 / page)127.57±0.28 (1.43±0.00 / page)16.54±0.05 (0.19±0.00 / page)70.57±55.62 (0.79±0.62 / page)
LU1139920265-LU1125674611-LU1125674538-LU1125674967-LU1125674454-LU1125674702_English_2015_EastCapital-Lux-FrontierM.pdf9214.09±0.13 (0.15±0.00 / page)118.30±0.58 (1.29±0.01 / page)120.39±0.18 (1.31±0.00 / page)17.11±0.11 (0.19±0.00 / page)67.48±51.89 (0.73±0.56 / page)
MAGALLANES_VALUE_INVESTORS_UCITS_LU1330191542_2016_X_P_X_X.pdf9310.52±0.16 (0.11±0.00 / page)51.99±0.24 (0.56±0.00 / page)53.63±0.11 (0.58±0.00 / page)12.49±0.11 (0.13±0.00 / page)32.16±20.67 (0.35±0.22 / page)
DEMOGRAPHIC_CHANGE_2018_X_P_X_X.pdf9511.88±0.22 (0.13±0.00 / page)48.95±0.21 (0.52±0.00 / page)50.97±0.16 (0.54±0.00 / page)13.97±0.08 (0.15±0.00 / page)31.44±18.55 (0.33±0.20 / page)
UNK_English_UNK_LFIS-Vision-UCITS.pdf9711.34±0.05 (0.12±0.00 / page)57.55±0.35 (0.59±0.00 / page)59.22±0.10 (0.61±0.00 / page)13.37±0.04 (0.14±0.00 / page)35.37±23.03 (0.36±0.24 / page)
average (per page)0.19±0.131.14±2.431.19±2.480.24±0.160.69±1.80
+
+
+7 FinTOC 2022 (en, 100-500 pages) (21 files) + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenamePagespdf_with_text_layer
tabbytrueautoauto_tabbyaverage
Dexia_Equities_L_2011_X_P_X_X.pdf10010.98±0.09 (0.11±0.00 / page)69.44±0.06 (0.69±0.00 / page)71.54±0.15 (0.72±0.00 / page)13.23±0.09 (0.13±0.00 / page)41.30±29.21 (0.41±0.29 / page)
Lombard_Odier_Funds_2014_X_P_X_X.pdf10415.49±0.04 (0.15±0.00 / page)101.54±0.07 (0.98±0.00 / page)104.63±0.25 (1.01±0.00 / page)18.61±0.14 (0.18±0.00 / page)60.07±43.04 (0.58±0.41 / page)
LU0881817786-LU0881818081-LU0881817430-LU0881817190_English_2014_OddoBondsHighYieldEurope.pdf10713.42±0.04 (0.13±0.00 / page)254.76±0.21 (2.38±0.00 / page)258.69±0.44 (2.42±0.00 / page)16.88±0.19 (0.16±0.00 / page)135.94±120.80 (1.27±1.13 / page)
LU0800341645-LU0800341132-LU0800341991-LU0800341215-LU0800341058-LU0800341488-LU0800341306_English_2012_FranklinBrazilOpportunities.pdf11017.99±0.08 (0.16±0.00 / page)97.31±0.10 (0.88±0.00 / page)101.53±0.17 (0.92±0.00 / page)21.57±0.16 (0.20±0.00 / page)59.60±39.87 (0.54±0.36 / page)
Prospectus-2016-02-01.pdf11013.12±0.13 (0.12±0.00 / page)78.04±0.17 (0.71±0.00 / page)80.75±0.08 (0.73±0.00 / page)15.37±0.07 (0.14±0.00 / page)46.82±32.60 (0.43±0.30 / page)
LU1252823262-LU0482498846-LU0482498762-LU0955861710-LU0955867758-LU0955867915-LU0432616810-LU0607521506-LU0955867832-LU0482498176-LU0955861983-LU0955861801-LU0432616901_English_2013_InvescoBalanced-RiskAlloc-.pdf11318.53±0.06 (0.16±0.00 / page)88.26±0.28 (0.78±0.00 / page)92.13±0.34 (0.82±0.00 / page)21.98±0.15 (0.19±0.00 / page)55.23±35.02 (0.49±0.31 / page)
LU0949250459-LU0645132902-LU0229041164-LU0390138864-LU0188151251-LU0543370943-LU0195951883-LU0152904719-LU0188151095-LU0543370513-LU0889566138-LU0229948244-LU0229948087-LU0122613572-LU0229949648_English_2012_Franklin.pdf11622.53±0.59 (0.19±0.01 / page)306.84±0.41 (2.65±0.00 / page)314.77±1.15 (2.71±0.01 / page)28.40±0.08 (0.24±0.00 / page)168.13±142.71 (1.45±1.23 / page)
LU1057354992_English_2016_EchiquierEuropeanBondsAEUR.pdf11814.77±0.09 (0.13±0.00 / page)96.58±0.11 (0.82±0.00 / page)99.78±0.10 (0.85±0.00 / page)17.49±0.24 (0.15±0.00 / page)57.15±41.05 (0.48±0.35 / page)
LU0641972152-LU0641972079_English_2015_DBPWMIGlobalAllocationTracker-.pdf12114.30±0.03 (0.12±0.00 / page)485.43±0.30 (4.01±0.00 / page)488.98±0.44 (4.04±0.00 / page)17.63±0.10 (0.15±0.00 / page)251.58±235.63 (2.08±1.95 / page)
FU_BF097_EN_2019-05-13_a5585d06-b4df-4b1e-bf30-3c1b5615cf74.pdf12215.64±0.04 (0.13±0.00 / page)125.95±0.16 (1.03±0.00 / page)129.33±0.20 (1.06±0.00 / page)18.40±0.02 (0.15±0.00 / page)72.33±55.33 (0.59±0.45 / page)
LU0705072691-LU0705072345-LU0705072188_English_2015_RAM-LUX-Long-Sh-EmergingMarktesEq-.pdf12519.43±0.11 (0.16±0.00 / page)321.80±0.30 (2.57±0.00 / page)328.51±0.47 (2.63±0.00 / page)24.03±0.35 (0.19±0.00 / page)173.44±151.74 (1.39±1.21 / page)
LU0309082104-LU0309082799-LU0309082369_English_2015_DNCAInvest-Infrastructures.pdf13414.62±0.06 (0.11±0.00 / page)219.02±0.44 (1.63±0.00 / page)223.57±0.30 (1.67±0.00 / page)17.44±0.10 (0.13±0.00 / page)118.66±102.65 (0.89±0.77 / page)
LU0462973008-LU0512124362_English_2015_DNCAInvestMiura.pdf13414.52±0.03 (0.11±0.00 / page)218.86±0.45 (1.63±0.00 / page)223.51±0.23 (1.67±0.00 / page)17.49±0.09 (0.13±0.00 / page)118.60±102.61 (0.89±0.77 / page)
LU0375979613-LU0375979290_English_2015_GISDyn-ControlPFCo.pdf13618.45±0.02 (0.14±0.00 / page)222.75±0.19 (1.64±0.00 / page)227.47±0.23 (1.67±0.00 / page)22.13±0.09 (0.16±0.00 / page)122.70±102.43 (0.90±0.75 / page)
LU0289452210_English_2015_DBPWMIIGISUSEquityPortfolioB.pdf13618.27±0.35 (0.13±0.00 / page)83.28±0.08 (0.61±0.00 / page)87.35±0.07 (0.64±0.00 / page)21.63±0.12 (0.16±0.00 / page)52.63±32.73 (0.39±0.24 / page)
LU0424369923-LU0424369766-LU0114314536-LU0063949068-LU0686792812-LU0061927850-LU0686794354_English_2013_ManConvertibles.pdf14721.73±0.03 (0.15±0.00 / page)496.84±0.96 (3.38±0.01 / page)504.14±0.97 (3.43±0.01 / page)26.09±0.17 (0.18±0.00 / page)262.20±238.31 (1.78±1.62 / page)
LU0575375588-LU0493852429-LU0261074230-LU0640453774-LU0860716223-LU0575374698-LU0493865678-LU0860715415-LU0493851454-LU0160485420-LU0493867534-LU0860716140-LU0953070868-LU0956110364-LU0688432862_English_2016_AshmoreS.pdf18121.72±0.16 (0.12±0.00 / page)137.00±0.14 (0.76±0.00 / page)140.50±0.07 (0.78±0.00 / page)24.88±0.06 (0.14±0.00 / page)81.02±57.75 (0.45±0.32 / page)
HSBC_Global_Investment_Funds_2017_X_P_X_A.pdf25032.34±0.19 (0.13±0.00 / page)246.00±0.49 (0.98±0.00 / page)250.83±0.46 (1.00±0.00 / page)36.65±0.21 (0.15±0.00 / page)141.46±106.98 (0.57±0.43 / page)
LU0178440839-LU0178439401-LU0178439310-LU0178439666_English_2012_AllianzBestSty-Eu-Eq-.pdf28534.42±0.19 (0.12±0.00 / page)256.92±0.54 (0.90±0.00 / page)261.52±0.22 (0.92±0.00 / page)36.69±0.19 (0.13±0.00 / page)147.39±111.85 (0.52±0.39 / page)
LU0589944569-LU0348788117-LU1156968403-LU1254141333-LU0348791418_English_2011_AllianzEm-AsiaEq-.pdf28533.59±0.59 (0.12±0.00 / page)256.60±0.34 (0.90±0.00 / page)261.08±0.47 (0.92±0.00 / page)37.69±0.27 (0.13±0.00 / page)147.24±111.62 (0.52±0.39 / page)
LU0734574329-LU0734574162-LU0333227550-LU0333226230-LU0571576585-LU1039626509-LU0333227394-LU0333226826-LU0734574246-LU0333227048_English_2015_ML.pdf40549.15±0.53 (0.12±0.00 / page)2917.33±9.87 (7.20±0.02 / page)2930.59±2.55 (7.24±0.01 / page)56.62±0.52 (0.14±0.00 / page)1488.42±1435.56 (3.68±3.54 / page)
average (per page)0.13±0.021.77±1.541.80±1.540.16±0.030.97±1.36
+
+ + diff --git a/scripts/benchmark_pdf_performance.py b/scripts/benchmark_pdf_performance.py new file mode 100644 index 00000000..c3fa48af --- /dev/null +++ b/scripts/benchmark_pdf_performance.py @@ -0,0 +1,155 @@ +import argparse +import json +import os.path +import zipfile +from typing import List + +import wget + +from scripts.benchmark_utils.pdf_performance_task import PDFPerformanceTask + + +def download_data(data_path: str) -> None: + data_archive_path = f"{data_path}.zip" + + wget.download("https://at.ispras.ru/owncloud/index.php/s/lp4wEVyZTd9lA0u/download", data_archive_path) + with zipfile.ZipFile(data_archive_path, "r") as archive: + archive.extractall(data_path) + + os.remove(data_archive_path) + + +def get_tasks(configs: List[dict], input_path: str, dedoc_host: str, pdf_options: List[str]) -> List[List[PDFPerformanceTask]]: + if input_path == "": + input_path = "pdf_performance_benchmark_data" + download_data(input_path) + + tasks = [] + + for config in configs: + config_tasks = [] + + for task_name in sorted(os.listdir(input_path)): + files_path = os.path.join(input_path, task_name) + if os.path.isdir(files_path) and not task_name.startswith("_"): + config_tasks.append(PDFPerformanceTask(dedoc_host, task_name, files_path, pdf_options, config)) + + tasks.append(config_tasks) + + return tasks + + +def make_report(tasks: List[List[PDFPerformanceTask]], output_path: str, configs: List[dict]) -> None: + with open(output_path, "w", encoding="utf-8") as f: + f.write(""" + + PDF performance benchmark + + + + + """) + + for config, config_tasks in zip(configs, tasks): + f.write("

Running parameters:

") + f.write(f"
{json.dumps(config, ensure_ascii=False, indent=2)}
\n\n") + + for task in config_tasks: + f.write(task.to_html()) + + f.write("\n") + f.write("\n") + + +def main() -> None: + default_output_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources", "benchmarks", "benchmark_pdf_performance.html")) + pdf_options = ["true", "false", "auto", "auto_tabby", "tabby"] + + parser = argparse.ArgumentParser(description="Script for evaluate different PDF readers performance.", formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument("-i", "--input", help="path to the directory with pdfs (default: %(default)s)", type=str, default="") + parser.add_argument("-o", "--output", help="path to the report filename (default: %(default)s)", type=str, default=default_output_path) + parser.add_argument("-n", "--loops", help="number of repetitions of testing one file (default: %(default)d)", type=int, default=1) + parser.add_argument("--dedoc-host", help="url to DEDOC instance for sending files (default: %(default)s", type=str, default="http://localhost:1231") + parser.add_argument("--pdf-options", help="values of pdf_with_text_layer argument", choices=pdf_options, nargs="+", default=pdf_options) + parser.add_argument("--parameters", help="path to json file with alternative parameters dictionaries") + args = parser.parse_args() + + if args.input != "": + assert os.path.exists(args.input), f'Directory "{args.input}" does not exists' + assert os.path.isdir(args.input), f'Path "{args.input}" is not a directory' + + assert args.loops > 0, "The number of repetitions of testing one file must be positive" + + print(f'Run pdf performance benchmark with next pdf options: {", ".join(args.pdf_options)}') + configs = [{}] + + if args.parameters: + with open(args.parameters, "r", encoding="utf-8") as f: + configs = json.load(f) + + tasks = get_tasks(configs, args.input, args.dedoc_host, args.pdf_options) + + for _ in range(args.loops): + for config_tasks in tasks: + for task in config_tasks: + task.run() + make_report(tasks, args.output, configs) + + +""" +How to run on default benchmark data? +Simple run next command: + python3 benchmark_pdf_performance.py + +Running on custom data: +1. Prepare folder with tasks. The task is a directory with pdf files. Directories starting with an underscore (_) will be ignored. +Example of a folder "pdf_data" with 3 tasks: + pdf_data + +--+--+ task1 + | +--- file1.pdf + | +--- file2.pdf + | + +--+ Some second task name + | +--- f.pdf + | + +--+ And last task name + | +--- file_.pdf + | +--- file2.pdf + | +--- not_pdf_file.docx + | + +--+ _ignored folder + +--- some_image.png + +--- some_pdf.pdf + +2. Run script with next command: + python3 benchmark_pdf_performance.py --pdf-options tabby true auto auto_tabby -i pdf_data + +2*. To evaluate with different parameters, you can prepare a json file with a list of dictionaries and specify the “parameters” option: + parameters.json: + [ + { "need_pdf_table_analysis": "false" }, + { "need_pdf_table_analysis": "true", "return_format": "plain_text" } + ] + +Run with next command: + python3 benchmark_pdf_performance.py --pdf-options tabby true auto auto_tabby -i pdf_data --parameters parameters.json + +3. Look your results in the pdf_performance.html file +""" +if __name__ == "__main__": + main() diff --git a/scripts/benchmark_utils/pdf_performance_task.py b/scripts/benchmark_utils/pdf_performance_task.py new file mode 100644 index 00000000..b82f9f7f --- /dev/null +++ b/scripts/benchmark_utils/pdf_performance_task.py @@ -0,0 +1,94 @@ +import os +import time +from typing import List + +from pdfminer.pdfpage import PDFPage + +from dedoc.utils.pdf_utils import get_pdf_page_count +from dedoc.utils.utils import send_file +from scripts.benchmark_utils.performance_result import PerformanceResult + + +class PDFPerformanceTask: + """ + This class is used to estimate the elapsed time of different PDF pipelines + in different PDF files and save the information into an html table. + """ + + def __init__(self, dedoc_host: str, title: str, input_dir: str, pdf_reader_options: List[str], config: dict) -> None: + """ + Initialization of task + + :param dedoc_host: URL to launch the dedoc API instance, for example http://localhost:1231 + :param title: title of the task to display in the html report + :param input_dir: path to the directory containing the PDF files. + :param pdf_reader_options: list of options available for the "pdf_with_text_layer" API parameter + :param config: additional file processing parameters + """ + self.dedoc_host = dedoc_host + self.title = title + self.config = config + self.pdf_reader_options = pdf_reader_options + + filenames = [os.path.join(input_dir, filename) for filename in os.listdir(input_dir) if filename.endswith(".pdf")] + self.times = {pdf_option: {filename: PerformanceResult() for filename in filenames} for pdf_option in self.pdf_reader_options} + self.pages = {filename: get_pdf_page_count(filename) for filename in filenames} + self.filenames = sorted(filenames, key=lambda filename: self.pages[filename]) + + def run(self) -> None: + print(f'Run task "{self.title}"') + + for pdf_option in self.pdf_reader_options: + print(f' Handle files with pdf option "{pdf_option}":') + self.__run_files(pdf_option) + + def to_html(self) -> str: + if not self.filenames: + return "" + + pdf_header = "".join(f"{pdf_option}" for pdf_option in self.pdf_reader_options) + + html = [ + "
", + f"{self.title} ({len(self.filenames)} files)", "", + f'', + f"{pdf_header}" + ] + + for filename in self.filenames: + times = [self.times[pdf_option][filename] for pdf_option in self.pdf_reader_options] + pages = self.pages[filename] + html.append(f"{self.__get_performance_cells(times, pages)}") + + times = [] + for pdf_option in self.pdf_reader_options: + times.append(PerformanceResult([self.times[pdf_option][filename] / self.pages[filename] for filename in self.filenames])) + + html.append(f'{self.__get_performance_cells(times)}') + html.append("
FilenamePagespdf_with_text_layer
average
{os.path.basename(filename)}{pages}
average (per page)
") + html.append("
\n") + + return "\n".join(html) + + def __run_file(self, pdf_option: str, filename: str) -> float: + start_time = time.time() + send_file(self.dedoc_host, os.path.basename(filename), filename, {"pdf_with_text_layer": pdf_option, **self.config}) + return time.time() - start_time + + def __run_files(self, pdf_option: str) -> None: + for i, filename in enumerate(self.filenames): + elapsed_time = self.__run_file(pdf_option, filename) + self.times[pdf_option][filename].add(elapsed_time) + print(f' - handle file {i + 1} / {len(self.filenames)} "{os.path.basename(filename)}" (pages: {self.pages[filename]}): {elapsed_time} seconds') + + print("") + + def __get_performance_cells(self, pdf_times: List[PerformanceResult], pages: int = 0) -> str: + total_times = pdf_times + [PerformanceResult(pdf_times)] + return "".join(f"{times} ({times / pages} / page)" if pages > 0 else f"{times}" for times in total_times) + + def __get_page_count(self, path: str) -> int: + with open(path, "rb") as fp: + pages = len(list(PDFPage.get_pages(fp))) + + return max(pages, 1) diff --git a/scripts/benchmark_utils/performance_result.py b/scripts/benchmark_utils/performance_result.py new file mode 100644 index 00000000..93b7db8c --- /dev/null +++ b/scripts/benchmark_utils/performance_result.py @@ -0,0 +1,59 @@ +from typing import Iterable, Optional, Union + +import numpy as np + + +class PerformanceResult: + """ + This class is used for storing multiple results of measuring some metric (for example, elapsed time) + with support for calculating mean and std statistics and pretty printing of stored values + + >>> result = PerformanceResult() + >>> f"result: {result}" # result: - + >>> result.add(5.0) + >>> f"result: {result}" # result: 5.00 + >>> result.add(8.0) + >>> f"result: {result}" # result: 6.50±1.50 + >>> result.mean # 6.5 + >>> result.std # 1.5 + >>> partial_result = result / 4 + >>> f"partial_result: {partial_result}" # partial_result: 1.62±0.38 + """ + + def __init__(self, results: Optional[Iterable["PerformanceResult"]] = None) -> None: + self.values = [] + + if results is not None: + for result in results: + self.add(result) + + def add(self, value: Union[float, "PerformanceResult"]) -> None: + if isinstance(value, PerformanceResult): + self.values.extend(value.values) + else: + self.values.append(value) + + @property + def mean(self) -> float: + return np.mean(self.values) if self.values else 0 + + @property + def std(self) -> float: + return np.std(self.values) if self.values else 0 + + def __str__(self) -> str: + if not self.values: + return "-" + + if len(self.values) == 1: + return f"{self.mean:.2f}" + + return f"{self.mean:.2f}±{self.std:.2f}" + + def __truediv__(self, scale: float) -> "PerformanceResult": + result = PerformanceResult() + + for t in self.values: + result.add(t / scale) + + return result diff --git a/tests/api_tests/test_api_doctype_article.py b/tests/api_tests/test_api_doctype_article.py new file mode 100644 index 00000000..508e2574 --- /dev/null +++ b/tests/api_tests/test_api_doctype_article.py @@ -0,0 +1,72 @@ +from tests.api_tests.abstract_api_test import AbstractTestApiDocReader + + +class TestApiArticle(AbstractTestApiDocReader): + + def test_article(self) -> None: + file_name = "pdf_with_text_layer/article.pdf" + result = self._send_request(file_name, dict(document_type="article")) + self.assertEqual(result["warnings"], ["use GROBID (version: 0.8.0)"]) + + tree = result["content"]["structure"] + self._check_tree_sanity(tree) + + # author 1 info + self.assertEqual("author", self._get_by_tree_path(tree, "0.0")["metadata"]["paragraph_type"]) + self.assertEqual("author_first_name", self._get_by_tree_path(tree, "0.0.0")["metadata"]["paragraph_type"]) # author 1 first name + self.assertEqual("Sonia", self._get_by_tree_path(tree, "0.0.0")["text"]) + self.assertEqual("author_surname", self._get_by_tree_path(tree, "0.0.1")["metadata"]["paragraph_type"]) # author 1 second name + self.assertEqual("Belaïd", self._get_by_tree_path(tree, "0.0.1")["text"]) + self.assertEqual("author_affiliation", self._get_by_tree_path(tree, "0.0.2")["metadata"]["paragraph_type"]) # the first affiliation of author 1 + self.assertEqual("org_name", self._get_by_tree_path(tree, "0.0.2.0")["metadata"]["paragraph_type"]) + self.assertEqual("École Normale Supérieure", self._get_by_tree_path(tree, "0.0.2.0")["text"]) + self.assertEqual("author_affiliation", self._get_by_tree_path(tree, "0.0.3")["metadata"]["paragraph_type"]) # the second affiliation of author 1 + self.assertEqual("org_name", self._get_by_tree_path(tree, "0.0.3.0")["metadata"]["paragraph_type"]) + self.assertEqual("Thales Communications & Security", self._get_by_tree_path(tree, "0.0.3.0")["text"]) + + # author 3 info + self.assertEqual("author", self._get_by_tree_path(tree, "0.2")["metadata"]["paragraph_type"]) + self.assertEqual("author_first_name", self._get_by_tree_path(tree, "0.2.0")["metadata"]["paragraph_type"]) # author 3 first name + self.assertEqual("François", self._get_by_tree_path(tree, "0.2.0")["text"]) + self.assertEqual("author_surname", self._get_by_tree_path(tree, "0.2.1")["metadata"]["paragraph_type"]) # author 3 second name + self.assertEqual("Xavier-Standaert", self._get_by_tree_path(tree, "0.2.1")["text"]) + self.assertEqual("author_affiliation", self._get_by_tree_path(tree, "0.2.2")["metadata"]["paragraph_type"]) # the first affiliation of author 3 + self.assertEqual("org_name", self._get_by_tree_path(tree, "0.2.2.0")["metadata"]["paragraph_type"]) + self.assertEqual("ICTEAM/ELEN/Crypto Group", self._get_by_tree_path(tree, "0.2.2.0")["text"]) + + # check bibliography list + self.assertEqual("bibliography", self._get_by_tree_path(tree, "0.20")["metadata"]["paragraph_type"]) + self.assertEqual(65, len(self._get_by_tree_path(tree, "0.20")["subparagraphs"])) + + # check bib_item 1 recognizing + self.assertEqual("title", self._get_by_tree_path(tree, "0.20.0.0")["metadata"]["paragraph_type"]) + self.assertEqual("Leakage-resilient symmetric encryption via re-keying", self._get_by_tree_path(tree, "0.20.0.0")["text"]) + self.assertEqual("title_conference_proceedings", self._get_by_tree_path(tree, "0.20.0.1")["metadata"]["paragraph_type"]) + self.assertEqual("Bertoni and Coron", self._get_by_tree_path(tree, "0.20.0.1")["text"]) + self.assertEqual("author", self._get_by_tree_path(tree, "0.20.0.2")["metadata"]["paragraph_type"]) # author 1 + self.assertEqual("\nMichelAbdalla\n", self._get_by_tree_path(tree, "0.20.0.2")["text"]) + self.assertEqual("biblScope_volume", self._get_by_tree_path(tree, "0.20.0.5")["metadata"]["paragraph_type"]) # author 1 + self.assertEqual("4", self._get_by_tree_path(tree, "0.20.0.5")["text"]) + self.assertEqual("biblScope_page", self._get_by_tree_path(tree, "0.20.0.6")["metadata"]["paragraph_type"]) # author 1 + self.assertEqual("471-488", self._get_by_tree_path(tree, "0.20.0.6")["text"]) + + # check cite on bib_item + bibliography_item_uuid = self._get_by_tree_path(tree, "0.20.57")["metadata"]["uid"] # checking on [58] references + section = self._get_by_tree_path(tree, "0.4.0") + bibliography_refs_in_text = [ann for ann in section["annotations"] if ann["name"] == "reference" and ann["value"] == bibliography_item_uuid] + # We must found two refs [58] in Introduction section + self.assertEqual(len(bibliography_refs_in_text), 2) + self.assertEqual(["58,", "58,"], [section["text"][bibliography_refs_in_text[n]["start"]:bibliography_refs_in_text[n]["end"]] for n in range(2)]) + + # check tables + self.assertEqual(len(result["content"]["tables"]), 2) + table = result["content"]["tables"][0] + self.assertEqual(table["metadata"]["title"], "Table 1 .Performance of some illustrative AES implementations.") + self.assertEqual(self._get_text_of_row(table["cells"][0]), ["Software (8-bit)", "code size", "cycle", "cost", "physical"]) + section_with_table_refs = self._get_by_tree_path(tree, "0.7.0") + table_refs_in_text = [ann for ann in section_with_table_refs["annotations"] if ann["name"] == "table" and ann["value"] == table["metadata"]["uid"]] + self.assertEqual(len(table_refs_in_text), 2) + self.assertEqual(["1", "1"], [section_with_table_refs["text"][table_refs_in_text[n]["start"]:table_refs_in_text[n]["end"]] for n in range(2)]) + + table = result["content"]["tables"][1] # Grobid can't recognize vertical orientation tables + self.assertEqual(table["metadata"]["title"], "Table 2 .List of our target implementations.") diff --git a/tests/data/pdf_with_text_layer/article.pdf b/tests/data/pdf_with_text_layer/article.pdf new file mode 100644 index 00000000..6c74f192 Binary files /dev/null and b/tests/data/pdf_with_text_layer/article.pdf differ