Skip to main content

Recognition result specification (JSON)

RecognitionResult

Root element of the recognition result

propertytyperequireddescription
resultSchemaVersionstringRecognition result schema version in major.minor.patch format, where major, minor, patch are non-negative integers.
dataFieldResultsmap(string,DataFieldResult)Results map where the key is the name of a data field and the value is its result

RecognitionResult json example

Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"M1": <DataFieldResult>,
...,
"MN": <DataFieldResult>
}
}

DataFieldResult

propertytyperequireddescription
dataTypestringThe type of the data field as it is specified in template.
resultslist(ResultValue)The list of recognition results which can be one of several types: TEXT, IMAGE, TABLE, GROUP.

DataFieldResult json example

Click to expand json
 {
"dataType": "root",
"results": [
<ResultValue>,
...,
<ResultValue>
]
}

ResultValue

TextResult

propertytyperequireddescription
resultTypestringThe type of value result. Always TEXT.
contentstringExtracted text data.
pageLocationMetaPageLocationMetaDescribes the location of the result within the PDF file.
fontMetaFontMetaContains information about font of the result content.

TextResult json without meta example

Click to expand json
{
"resultType": "TEXT",
"content": "st nd"
}

TextResult json with meta example

Click to expand json
{
"resultType": "TEXT",
"pageLocationMeta": <PageLocationMeta>,
"fontMeta": <FontMeta>,
"content": "st nd"
}

ImageResult

propertytyperequireddescription
resultTypestringThe type of value result. Always IMAGE.
base64stringRepresentation of the extracted image bytes as base64 string.
pageLocationMetaPageLocationMetaDescribes the location of the result within the PDF file.

ImageResult json without meta example

Click to expand json
{
"resultType": "IMAGE",
"base64": "abcdefghijk"
}

ImageResult json with meta example

Click to expand json
{
"resultType": "IMAGE",
"pageLocationMeta": <PageLocationMeta>,
"base64": "abcdefghijk"
}

TableResult

propertytyperequireddescription
resultTypestringThe type of value result. Always TABLE.
rowslist(TableRowResult)The list of table row results.
pageLocationMetaslist(PageLocationMeta)Describes the locations of the result within the PDF file, will contain multiple values in case the table takes up several pages.

TableResult without meta json example

Click to expand json
{
"resultType": "TABLE",
"rows": [
<TableRowResult>,
...,
<TableRowResult>
]
}

TableResult with meta json example

Click to expand json
{
"resultType": "TABLE",
"pageLocationMetas": [
<PageLocationMeta>,
...,
<PageLocationMeta>
],
"rows": [
<TableRowResult>,
...,
<TableRowResult>
]
}

TableRowResult

propertytyperequireddescription
resultTypestringThe type of value result. Always TABLE_ROW.
cellslist(TableCellResult)The list of table cells in the row.
pageLocationMetaPageLocationMetaDescribes the location of the result within the PDF file.

TableRowResult json without meta example

Click to expand json
{
"resultType": "TABLE_ROW",
"cells": [
<TableCellResult>,
...,
<TableCellResult>
]
}

TableRowResult json with meta example

Click to expand json
{
"resultType": "TABLE_ROW",
"pageLocationMeta": <PageLocationMeta>,
"cells": [
<TableCellResult>,
...,
<TableCellResult>
]
}

TableCellResult

propertytyperequireddescription
resultTypestringThe type of value result. Always TABLE_CELL.
contentstringText data extracted from the cell.
pageLocationMetaPageLocationMetaDescribes the location of the result within the PDF file.
fontMetaFontMetaContains information about font of the result content.
rowspanintSpecifies the number of rows a cell should span.
colspanintSpecifies the number of columns a cell should span.

TableCellResult without meta json example

Click to expand json
{
"resultType": "TABLE_CELL",
"content": "Key"
}

TableCellResult with meta json example

Click to expand json
{
"resultType": "TABLE_CELL",
"pageLocationMeta": <PageLocationMeta>,
"fontMeta": <FontMeta>,
"content": "Key"
}

GroupResult

propertytyperequireddescription
resultTypestringThe type of value result. Always GROUP.
entriesmap(string,GroupEntryResult)Map of grouped result entries, where the key is the name of the nested data field and the value is its result

GroupResult json example

Click to expand json
{
"resultType": "GROUP",
"entries": {
"GroupEntry1": <GroupEntryResult>,
...,
"GroupEntryN": <GroupEntryResult>
}
}

GroupEntryResult

propertytyperequireddescription
resultTypestringThe type of value result. Always GROUP_ENTRY.
resultslist(ResultValue)The list of recognition results which can be one of several types: TEXT, IMAGE, TABLE, GROUP.

GroupEntryResult json example

Click to expand json
{
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
<ResultValue>,
...,
<ResultValue>
]
}

Meta

PageLocationMeta

propertytyperequireddescription
xdoubleThe x coordinate on the page.
ydoubleThe y coordinate on the page.
widthdoubleThe width of the location.
heightdoubleThe height of the location.
pageintThe page number.

PageLocationMeta json example

Click to expand json
 {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
}

FontMeta

propertytyperequireddescription
fontNamestringThe font name.
fontStylestringThe font style. Possible values: NORMAL, BOLD, ITALIC, BOLD_ITALIC.
fontColorstringThe font color. The format is rrggbb , where rr, gg, bb are hex representations of corresponding color value.

FontMeta json example

Click to expand json
 {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
}

Complete example

RecognitionResult without meta json example

Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"TextField": {
"dataType": "root",
"results": [
{
"resultType": "TEXT",
"content": "st nd"
}
]
},
"ImageField": {
"dataType": "root",
"results": [
{
"resultType": "IMAGE",
"base64": "abcdefghijk"
}
]
},
"TableField": {
"dataType": "root",
"results": [
{
"resultType": "TABLE",
"rows": [
{
"resultType": "TABLE_ROW",
"cells": [
{
"resultType": "TABLE_CELL",
"content": "Key"
},
{
"resultType": "TABLE_CELL",
"content": "Key",
"rowspan": 2,
"colspan": 2
}
]
}
]
}
]
},
"GroupParentField": {
"dataType": "root",
"results": [
{
"resultType": "GROUP",
"entries": {
"GroupNestedField": {
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
{
"resultType": "TEXT",
"content": "Group Text"
}
]
}
}
}
]
}
}
}

RecognitionResult with meta json example

Click to expand json
{
"resultSchemaVersion": "1.0.0",
"dataFieldResults": {
"TextField": {
"dataType": "root",
"results": [
{
"resultType": "TEXT",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "st nd"
}
]
},
"ImageField": {
"dataType": "root",
"results": [
{
"resultType": "IMAGE",
"pageLocationMeta": {
"x": 160.8,
"y": 400.31,
"width": 20.1,
"height": 7.42,
"page": 2
},
"base64": "abcdefghijk"
},
],
},
"TableField": {
"dataType": "root",
"results": [
{
"resultType": "TABLE",
"pageLocationMetas": [
{
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
}
],
"rows": [
{
"resultType": "TABLE_ROW",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"cells": [
{
"resultType": "TABLE_CELL",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Key"
},
{
"resultType": "TABLE_CELL",
"pageLocationMeta": {
"x": 176.8,
"y": 350.9,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Key",
"colspan": 2,
"rowspan": 2
}
]
}
]
}
]
},
"GroupParentField": {
"dataType": "root",
"results": [
{
"resultType": "GROUP",
"entries": {
"GroupNestedField": {
"resultType": "GROUP_ENTRY",
"dataType": "dataType",
"results": [
{
"resultType": "TEXT",
"pageLocationMeta": {
"x": 176.8,
"y": 543.52,
"width": 34.1,
"height": 6.42,
"page": 2
},
"fontMeta": {
"fontName": "TimesNewRomanPSMT",
"fontStyle": "NORMAL",
"fontColor": "000000"
},
"content": "Group Text"
}
]
}
}
}
]
}
}
}

JSON schema

Click to expand json schema
{
"$schema": "https://json-schema.org/draft-07/schema#",
"$defs": {
"abstractResult": {
"type": "object",
"properties": {
"resultType": {
"enum": [
"TEXT",
"IMAGE",
"TABLE",
"TABLE_ROW",
"TABLE_CELL",
"GROUP",
"GROUP_ENTRY"
]
}
},
"required": [
"resultType"
],
"allOf": [
{
"if": {
"properties": {
"resultType": {
"const": "TEXT"
}
}
},
"then": {
"$ref": "#/$defs/textResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "IMAGE"
}
}
},
"then": {
"$ref": "#/$defs/imageResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE"
}
}
},
"then": {
"$ref": "#/$defs/tableResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE_ROW"
}
}
},
"then": {
"$ref": "#/$defs/tableRowResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "TABLE_CELL"
}
}
},
"then": {
"$ref": "#/$defs/tableCellResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "GROUP"
}
}
},
"then": {
"$ref": "#/$defs/groupResult"
}
},
{
"if": {
"properties": {
"resultType": {
"const": "GROUP_ENTRY"
}
}
},
"then": {
"$ref": "#/$defs/groupEntryResult"
}
}
]
},
"textResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TEXT"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"fontMeta": {
"$ref": "#/$defs/fontMeta"
},
"content": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"resultType",
"content"
]
},
"imageResult": {
"type": "object",
"properties": {
"resultType": {
"const": "IMAGE"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"base64": {
"type": "string",
"pattern": "[a-zA-Z+/=]+"
}
},
"additionalProperties": false,
"required": [
"resultType",
"base64"
]
},
"tableResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE"
},
"pageLocationMetas": {
"type": "array",
"items": {
"$ref": "#/$defs/pageLocationMeta"
}
},
"rows": {
"type": "array",
"items": {
"$ref": "#/$defs/tableRowResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"tableRowResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE_ROW"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"cells": {
"type": "array",
"items": {
"$ref": "#/$defs/tableCellResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"tableCellResult": {
"type": "object",
"properties": {
"resultType": {
"const": "TABLE_CELL"
},
"pageLocationMeta": {
"$ref": "#/$defs/pageLocationMeta"
},
"fontMeta": {
"$ref": "#/$defs/fontMeta"
},
"colspan": {
"type": "integer",
"exclusiveMinimum": 1
},
"rowspan": {
"type": "integer",
"exclusiveMinimum": 1
},
"content": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"resultType",
"content"
]
},
"groupResult": {
"type": "object",
"properties": {
"resultType": {
"const": "GROUP"
},
"entries": {
"type": "object",
"$comment": "Using additional properties as any property name is group entry name.",
"additionalProperties": {
"$ref": "#/$defs/groupEntryResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"groupEntryResult": {
"type": "object",
"properties": {
"resultType": {
"const": "GROUP_ENTRY"
},
"dataType": {
"type": "string"
},
"results": {
"type": "array",
"items": {
"$ref": "#/$defs/abstractResult"
}
}
},
"additionalProperties": false,
"required": [
"resultType"
]
},
"pageLocationMeta": {
"type": "object",
"properties": {
"x": {
"type": "number"
},
"y": {
"type": "number"
},
"width": {
"type": "number"
},
"height": {
"type": "number"
},
"page": {
"type": "integer"
}
},
"additionalProperties": false
},
"fontMeta": {
"type": "object",
"properties": {
"fontName": {
"type": "string"
},
"fontStyle": {
"enum": ["BOLD", "ITALIC", "BOLD_ITALIC", "NORMAL"]
},
"fontColor": {
"type": "string",
"pattern": "^#?[0-9a-fA-F]{6}$"
}
},
"additionalProperties": false
}
},
"type": "object",
"properties": {
"resultSchemaVersion": {
"type": "string",
"pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"
},
"dataFieldResults": {
"type": "object",
"$comment": "Using additional properties as any property name is datafield name.",
"additionalProperties": {
"type": "object",
"properties": {
"dataType": {
"type": "string"
},
"results": {
"type": "array",
"items": {
"$ref": "#/$defs/abstractResult"
}
}
},
"additionalProperties": false
}
}
},
"additionalProperties": false,
"required": [
"resultSchemaVersion"
]
}