"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    var desc = Object.getOwnPropertyDescriptor(m, k);
    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
      desc = { enumerable: true, get: function() { return m[k]; } };
    }
    Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
    Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
    o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
    if (mod && mod.__esModule) return mod;
    var result = {};
    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
    __setModuleDefault(result, mod);
    return result;
};
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
    return new (P || (P = Promise))(function (resolve, reject) {
        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
        step((generator = generator.apply(thisArg, _arguments || [])).next());
    });
};
var __rest = (this && this.__rest) || function (s, e) {
    var t = {};
    for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
        t[p] = s[p];
    if (s != null && typeof Object.getOwnPropertySymbols === "function")
        for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
            if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
                t[p[i]] = s[p[i]];
        }
    return t;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.filterEligibleCommissionRows = exports.filterRowsByCutoffText = exports.splitTableBodyRows = exports.getRows = exports.getRowConfig = void 0;
const graphql_types_1 = require("@comulate/graphql-types");
const utils_1 = require("./utils");
const utils_2 = require("../../common/utils");
const constants_1 = require("./constants");
const blockUtils_1 = require("../blockUtils");
const _ = __importStar(require("lodash"));
const mathUtils_1 = require("../mathUtils");
/**
 * Generates flags that determine how row/block positions will be set.
 *
 * For whether to split Textract rows into individual lines, this flag
 * will be set if any of the following criteria are met (unless we suspect
 * the document is rotated):
 *    1. Replay or custom config has enableTableBodyRowSplitting = true
 *    2. Template rows contain any misc (non-labeled) content in fields
 *    3. Any row contains any misc content in non-text fields
 *
 * Additionally specifies which fields should be wrapped if this is true.
 *
 * For whether to append non-table body rows to original rows (by
 * default, we only use rows Textract considers to be in table bodies),
 * this flag will be set if any of the following criteria are met:
 *    1. Replay or custom config has enableNonTableBodyRows = true
 *    2. Template contains block outside of any table body
 *    3. There is overlap of any tables along y-plane
 *
 * @param template
 * @param rowSignatureConfig
 * @param rows
 * @param blocks
 * @param yScale
 * @param defaultFlags
 * @returns
 */
const getRowConfig = (template, blocks, documentIdentifierHeaders, defaultFlags) => __awaiter(void 0, void 0, void 0, function* () {
    const { rows, yScale } = getTableBodyRows(blocks, documentIdentifierHeaders, !!(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRowOffsetNormalization), !!(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRotationOffsetNormalization), !!(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disablePageScaleNormalization), !!(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enablePageBasisRowOffsetNormalization), defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.yScaleOverride);
    yield (0, utils_2.yieldEventLoop)();
    const baseConfig = {
        enableDuplicateRowMatches: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableDuplicateRowMatches, true // Default to true if flag not explicitly set
        ),
        enablePageBasisRowOffsetNormalization: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enablePageBasisRowOffsetNormalization,
        pageBounds: getPageTableBounds(blocks),
        maxTableWidth: getMaxTableWidth(blocks, documentIdentifierHeaders),
        yScale,
    };
    yield (0, utils_2.yieldEventLoop)();
    if (template) {
        const templateBlockIds = Object.values(template).flat();
        const tableBodyBlockIds = new Set(rows.map((row) => row.map((cell) => cell.id)).flat());
        const allTemplateBlocksWithinTable = templateBlockIds.every((blockId) => tableBodyBlockIds.has(blockId));
        const containsOverlappingTables = tablesOverlap(blocks, yScale);
        const containsSectionHeaders = templateSpansMultipleLines(template, blocks);
        const enableNonTableBodyRows = !allTemplateBlocksWithinTable ||
            containsOverlappingTables ||
            containsSectionHeaders;
        const fieldsWithExtraContent = getTemplateFieldsWithExtraContent(template, rows);
        const containsExtraRowContent = fieldsWithExtraContent.length > 0 ||
            tableRowsContainExtraContent(template, rows, yScale || constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD);
        const documentRotated = !containsSectionHeaders &&
            templateBlockRangeExceedsThreshold(template, blocks);
        const maxTableSkew = getMaxTableSkew(blocks);
        const containsSkewedTables = !documentRotated && maxTableSkew > constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD / 2;
        const enableTableBodyRowSplitting = (defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableTableBodyRowSplitting) ||
            containsExtraRowContent ||
            containsSkewedTables ||
            containsOverlappingTables ||
            enableNonTableBodyRows; // Split rows by default unless manually overridden
        const lineWrapEnabledFields = (defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.lineWrapEnabledFields) ||
            (enableTableBodyRowSplitting && !containsSectionHeaders
                ? constants_1.DEFAULT_LINE_WRAP_ENABLED_FIELDS.filter((field) => !fieldsWithExtraContent.includes(field))
                : []);
        const disableRowOffsetNormalization = !enableRowOffsetNormalization(blocks, documentIdentifierHeaders) ||
            containsSkewedTables;
        return Object.assign(Object.assign({}, baseConfig), { enableNonTableBodyRows: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableNonTableBodyRows, enableNonTableBodyRows || undefined), enableTableBodyRowSplitting: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableTableBodyRowSplitting, enableTableBodyRowSplitting || undefined), lineWrapEnabledFields: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.lineWrapEnabledFields, lineWrapEnabledFields.length ? lineWrapEnabledFields : undefined), disableRowOffsetNormalization: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRowOffsetNormalization, disableRowOffsetNormalization || undefined), disableRotationOffsetNormalization: (0, utils_1.coalesceFlag)(defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRotationOffsetNormalization, containsSkewedTables || undefined), disablePageScaleNormalization: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disablePageScaleNormalization });
    }
    return Object.assign(Object.assign({}, baseConfig), { enableNonTableBodyRows: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableNonTableBodyRows, enableTableBodyRowSplitting: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.enableTableBodyRowSplitting, lineWrapEnabledFields: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.lineWrapEnabledFields, disableRowOffsetNormalization: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRowOffsetNormalization, disablePageScaleNormalization: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disablePageScaleNormalization, disableRotationOffsetNormalization: defaultFlags === null || defaultFlags === void 0 ? void 0 : defaultFlags.disableRotationOffsetNormalization });
});
exports.getRowConfig = getRowConfig;
/**
 * Check whether any template row spans multiple *Comulate* lines - *not*
 * whether it spans multiple Textract rows. A template row could be contained
 * in a single Textract row yet still span multiple lines.
 *
 * @param template
 * @param blocks
 * @returns
 */
const templateSpansMultipleLines = (template, blocks) => {
    const blocksById = _.keyBy(blocks, "Id");
    const points = Object.entries(template)
        .filter(([, blockIds]) => blockIds.length)
        .map(([, blockIds]) => {
        var _a, _b, _c, _d, _e, _f;
        const yMin = Math.min(...blockIds.map((blockId) => { var _a, _b, _c; return (_c = (_b = (_a = blocksById[blockId]) === null || _a === void 0 ? void 0 : _a.Geometry) === null || _b === void 0 ? void 0 : _b.BoundingBox) === null || _c === void 0 ? void 0 : _c.Top; }));
        const yMax = Math.max(...blockIds.map((blockId) => {
            var _a, _b, _c, _d, _e, _f;
            return ((_c = (_b = (_a = blocksById[blockId]) === null || _a === void 0 ? void 0 : _a.Geometry) === null || _b === void 0 ? void 0 : _b.BoundingBox) === null || _c === void 0 ? void 0 : _c.Top) +
                ((_f = (_e = (_d = blocksById[blockId]) === null || _d === void 0 ? void 0 : _d.Geometry) === null || _e === void 0 ? void 0 : _e.BoundingBox) === null || _f === void 0 ? void 0 : _f.Height);
        }));
        const blockId = blockIds[0];
        const x = (_c = (_b = (_a = blocksById[blockId]) === null || _a === void 0 ? void 0 : _a.Geometry) === null || _b === void 0 ? void 0 : _b.BoundingBox) === null || _c === void 0 ? void 0 : _c.Left;
        const w = (_f = (_e = (_d = blocksById[blockId]) === null || _d === void 0 ? void 0 : _d.Geometry) === null || _e === void 0 ? void 0 : _e.BoundingBox) === null || _f === void 0 ? void 0 : _f.Width;
        return [
            x + w / 2,
            { top: yMin, middle: (yMin + yMax) / 2, bottom: yMax },
        ];
    });
    return (Math.min((0, mathUtils_1.meanAbsoluteError)(points.map(([x, { top }]) => [x, top])).mae, (0, mathUtils_1.meanAbsoluteError)(points.map(([x, { middle }]) => [x, middle])).mae, (0, mathUtils_1.meanAbsoluteError)(points.map(([x, { bottom }]) => [x, bottom])).mae) > 0.001);
};
/**
 * Checks whether there is any extra content in the Textract row(s) containing the
 * template as determined by any overlap on the x-plane with the labeled blocks. This
 * checks every field and is useful for differentiating between random text overflow
 * and consistent extra content on every row (this targets the latter).
 *
 * @param template
 * @param rows
 * @returns
 */
const getTemplateFieldsWithExtraContent = (template, rows) => {
    const templateRowEntries = Object.entries(template);
    const blockIdGroups = templateRowEntries.map(([, blockIds]) => blockIds);
    const containingRowBlocks = (0, utils_1.getContainingRows)(rows, blockIdGroups.flat()).flat();
    const blockStartEndPositions = containingRowBlocks
        .map((cell) => [
        [cell.id, cell.boundingBox.x],
        [cell.id, cell.boundingBox.x + cell.boundingBox.width / 2],
        [cell.id, cell.boundingBox.x + cell.boundingBox.width],
    ])
        .flat()
        .sort(([, a], [, b]) => a - b)
        .map(([blockId]) => blockId);
    // Get relative indices of min/mid/max x-positions of each template block group
    const templateBlockRanges = blockIdGroups.map((group) => group
        .map((blockId) => {
        const min = _.indexOf(blockStartEndPositions, blockId);
        const mid = _.indexOf(blockStartEndPositions, blockId, min + 1);
        const max = _.indexOf(blockStartEndPositions, blockId, mid + 1);
        return [min, mid, max];
    })
        .flat());
    return templateRowEntries
        .map(([header], i) => {
        const range = templateBlockRanges[i];
        return [
            header,
            // Indices will be "uninterrupted" if there is no overlapping content
            Math.max(...range) - Math.min(...range) >= range.length,
        ];
    })
        .filter(([, containsExtraContent]) => containsExtraContent)
        .map(([header]) => header);
};
/**
 * Checks whether the difference between the min and max y-values of all the
 * template blocks exceeds a set threshold - if this is true and
 * yet each block is alleged to be on the same line (as determined by
 * MAE along central axis) then we suspect the document is rotated.
 *
 * @param template
 * @param rows
 * @returns
 */
const templateBlockRangeExceedsThreshold = (template, blocks) => {
    const blockIds = Object.values(template)
        .map((group) => group[0])
        .filter(utils_2.isNotNullAndNotUndefined);
    const blocksById = _.keyBy(blocks, "Id");
    const yMin = Math.min(...blockIds.map((blockId) => { var _a, _b; return (_b = (_a = blocksById[blockId].Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Top; }));
    const yMax = Math.max(...blockIds.map((blockId) => { var _a, _b; return (_b = (_a = blocksById[blockId].Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Top; }));
    return yMax - yMin > constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
};
const getMaxTableSkew = (blocks) => {
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    return Math.max(...tableBlocks.map((tableBlock) => {
        var _a, _b, _c, _d, _e, _f, _g, _h;
        const yMin = (_b = (_a = tableBlock.Geometry) === null || _a === void 0 ? void 0 : _a.Polygon) === null || _b === void 0 ? void 0 : _b[0].Y;
        const yMax = (_d = (_c = tableBlock.Geometry) === null || _c === void 0 ? void 0 : _c.Polygon) === null || _d === void 0 ? void 0 : _d[1].Y;
        const xMin = (_f = (_e = tableBlock.Geometry) === null || _e === void 0 ? void 0 : _e.Polygon) === null || _f === void 0 ? void 0 : _f[0].X;
        const xMax = (_h = (_g = tableBlock.Geometry) === null || _g === void 0 ? void 0 : _g.Polygon) === null || _h === void 0 ? void 0 : _h[3].X;
        const verticalSkew = Math.abs(yMax - yMin);
        const horizontalSkew = Math.abs(xMax - xMin);
        return Math.max(verticalSkew, horizontalSkew);
    }));
};
/**
 * Determines whether block positions should be set to be relative to their
 * containing row or to the page itself. If more than one table is present, uses
 * shifts in document headers to determine whether page content shifts. Otherwise,
 * uses table block confidence scores as proxy for document quality - if quality
 * is low, it is less likely that PDF was downloaded and more likely PDF was scanned,
 * meaning pages could shift.
 *
 * @param blocks
 * @param documentIdentifierHeaders
 * @returns
 */
const enableRowOffsetNormalization = (blocks, documentIdentifierHeaders) => {
    const blocksById = _.keyBy(blocks, "Id");
    const documentIdentifierHeaderTableBlocks = getDocumentIdentifierHeaderTables(blocks, blocksById, documentIdentifierHeaders).map((tableBlock) => {
        const childIds = (0, blockUtils_1.getChildRelationshipIds)(tableBlock);
        if (childIds === undefined || !tableBlock.Id)
            return [];
        return childIds
            .map((id) => blocksById[id])
            .filter(({ BlockType, EntityTypes }) => BlockType === "CELL" && (EntityTypes === null || EntityTypes === void 0 ? void 0 : EntityTypes.includes("COLUMN_HEADER")))
            .map((block) => { var _a; return (_a = (0, blockUtils_1.getChildRelationshipIds)(block)) === null || _a === void 0 ? void 0 : _a.map((childId) => blocksById[childId]); })
            .filter(utils_2.isNotNullAndNotUndefined)
            .flat();
    });
    // Only use header shifts if more than one set of headers to compare against
    if (documentIdentifierHeaderTableBlocks.length > 1) {
        const medianHeaderShiftDistance = (0, mathUtils_1.median)(documentIdentifierHeaders
            .map((header) => documentIdentifierHeaderTableBlocks
            .map((tableHeader) => {
            var _a, _b;
            const block = tableHeader.find((block) => block.Text === header);
            return (_b = (_a = block === null || block === void 0 ? void 0 : block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left;
        })
            .filter(utils_2.isNotNullAndNotUndefined))
            .map((values) => Math.max(...values) - Math.min(...values)));
        return medianHeaderShiftDistance > constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD / 2;
    }
    return ((0, blockUtils_1.getMedianConfidenceOfPrimaryTable)(blocks, documentIdentifierHeaders) < 0.7);
};
/**
 * Checks whether any table body rows contain miscellaneous extra content, as
 * determined by checking whether there is any content of a valid type that could
 * actually be matched in the non-text fields spanning multiple lines. Only
 * non-text fields are checked to avoid splitting for cases like line wrapping
 * (which we prefer Textract rows for).
 *
 * @param template
 * @param rows
 * @param yScale
 * @returns
 */
const tableRowsContainExtraContent = (template, rows, yScale) => {
    const fieldBounds = (0, utils_1.getFieldBounds)(template, rows).filter(({ field }) => (0, utils_1.getFieldType)(field) !== graphql_types_1.AdemFieldType.STRING);
    const rowsFilteredByFields = rows
        .map((row) => row.filter((block) => {
        const xMin = block.boundingBox.x;
        const xMax = block.boundingBox.x + block.boundingBox.width;
        return fieldBounds.some((_a) => {
            var { field } = _a, bounds = __rest(_a, ["field"]);
            return ((xMin > bounds.xMin - yScale && xMin < bounds.xMax + yScale) ||
                (xMax > bounds.xMin - yScale && xMax < bounds.xMax + yScale) ||
                (xMin < bounds.xMin - yScale && xMax > bounds.xMax + yScale)) &&
                (0, utils_1.isValidMatchedBlockText)((0, utils_1.getFieldType)(field), block.text);
        });
    }))
        .filter((row) => row.length);
    return ((0, exports.splitTableBodyRows)(rowsFilteredByFields, yScale).length >
        rowsFilteredByFields.length);
};
/**
 * Detects whether there is any overlap of tables along y-plane
 *
 * @param blocks D
 * @param yScale
 * @returns
 */
const tablesOverlap = (blocks, yScale) => {
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    const tableBlocksCoords = tableBlocks.map((block) => {
        var _a, _b, _c, _d, _e, _f;
        return ({
            yMin: ((_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Top) + block.Page,
            yMax: ((_d = (_c = block.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Top) +
                ((_f = (_e = block.Geometry) === null || _e === void 0 ? void 0 : _e.BoundingBox) === null || _f === void 0 ? void 0 : _f.Height) +
                block.Page,
        });
    });
    tableBlocksCoords.sort((a, b) => a.yMin - b.yMin);
    for (let i = 0; i < tableBlocksCoords.length - 1; i++) {
        if (tableBlocksCoords[i].yMax >=
            tableBlocksCoords[i + 1].yMin + (yScale || constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD)) {
            return true;
        }
    }
    return false;
};
const getRows = (template, rowSignatureConfig, blocks, documentIdentifierHeaders, debug, rowConfig, baseRequiredFields = [], flags = {}, regexFilter) => __awaiter(void 0, void 0, void 0, function* () {
    const { yScale } = rowConfig;
    let { rows } = getTableBodyRows(blocks, documentIdentifierHeaders, !!rowConfig.disableRowOffsetNormalization, !!rowConfig.disableRotationOffsetNormalization, !!rowConfig.disablePageScaleNormalization, !!rowConfig.enablePageBasisRowOffsetNormalization);
    debug.textractRows = rows;
    yield (0, utils_2.yieldEventLoop)();
    if (rowConfig.enableNonTableBodyRows) {
        rows = getNonTableBodyRows(blocks, rows, rowConfig, yScale)
            .concat(rows)
            .sort((a, b) => { var _a, _b; return ((_a = a[0]) === null || _a === void 0 ? void 0 : _a.boundingBox.y) - ((_b = b[0]) === null || _b === void 0 ? void 0 : _b.boundingBox.y); });
    }
    else {
        rows = appendEligibleHeaderRows(blocks, rows, template, (rowSignatureConfig === null || rowSignatureConfig === void 0 ? void 0 : rowSignatureConfig.rowSignatureGroups.map((group) => group.rowSignatures).flat()) || [], rowConfig, baseRequiredFields, flags);
    }
    yield (0, utils_2.yieldEventLoop)();
    if (rowConfig.enableTableBodyRowSplitting) {
        rows = (0, exports.splitTableBodyRows)(rows, yScale);
    }
    yield (0, utils_2.yieldEventLoop)();
    if (flags.rowMatchStartCutoffText || flags.rowMatchEndCutoffText) {
        rows = (0, exports.filterRowsByCutoffText)(rows, flags);
    }
    // Filter rows by custom regex if specified
    if (regexFilter) {
        rows = rows.filter((row) => {
            const text = row.map((block) => block.text).join(" ");
            return regexFilter.test(text);
        });
        // Update row indices of filtered rows
        rows.forEach((row, rowInd) => row.forEach((block) => (block.rowIndex = rowInd)));
    }
    yield (0, utils_2.yieldEventLoop)();
    // Find any rows that are not included for matching for downstream checks
    // to ensure we aren't erroneously missing any content
    const excludedRows = getNonTableBodyRows(blocks, rows, rowConfig, yScale);
    yield (0, utils_2.yieldEventLoop)();
    return { rows, yScale, rowConfig, excludedRows };
});
exports.getRows = getRows;
const splitTableBodyRows = (rows, yScale) => {
    const boundedBlocksById = _.keyBy(rows.flat(), (block) => block.id);
    const blockPositions = rows
        .flat()
        .map((block) => [
        block.id,
        block.boundingBox.y + block.boundingBox.height / 2,
    ]);
    return (0, utils_1.groupPositionsByLine)(blockPositions, yScale)
        .map(([, blockIds]) => blockIds
        .map((blockId) => boundedBlocksById[blockId])
        .sort((a, b) => a.boundingBox.x - b.boundingBox.x))
        .map((row, rowIndex) => row.map((block) => (Object.assign(Object.assign({}, block), { rowIndex }))));
};
exports.splitTableBodyRows = splitTableBodyRows;
const getTableBodyRows = (blocks, documentIdentifierHeaders, disableRowOffsetNormalization, disableRotationOffsetNormalization, disablePageScaleNormalization, enablePageBasisRowOffsetNormalization, yScaleOverride) => {
    const blocksById = _.keyBy(blocks, "Id");
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    const pageBounds = getPageTableBounds(blocks);
    const maxTableWidth = getMaxTableWidth(blocks, documentIdentifierHeaders);
    const textractTableBodyRows = getTextractTableBodyRows(blocks);
    const minRowInd = Math.min(...textractTableBodyRows.map((row) => { var _a; return ((_a = row[0]) === null || _a === void 0 ? void 0 : _a.RowIndex) || 0; }));
    const rows = textractTableBodyRows.map((row) => {
        const wordBlocks = row
            .map((block) => { var _a; return (_a = (0, blockUtils_1.getChildRelationshipIds)(block)) === null || _a === void 0 ? void 0 : _a.map((childId) => blocksById[childId]); })
            .filter(utils_2.isNotNullAndNotUndefined)
            .flat();
        return wordBlocks.map((block) => textractBlockToBoundedBlock(getBoundingBoxFromContainingRow(row), block, row[0].RowIndex - minRowInd, // Subtract to account for header rows taking up row indices
        {
            disableRowOffsetNormalization,
            disableRotationOffsetNormalization,
            enablePageBasisRowOffsetNormalization,
            disablePageScaleNormalization,
            pageBounds,
            maxTableWidth,
        }));
    });
    const xScale = _.max(tableBlocks.map((block) => { var _a, _b; return (_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Width; }));
    const yScale = yScaleOverride ||
        Math.max(constants_1.MIN_OFFSET_ERROR_THRESHOLD, _.mean(rows
            .map((row) => _.mean(row.map((block) => block.boundingBox.height)))
            .filter((num) => !isNaN(num)))) * (disablePageScaleNormalization ? maxTableWidth : 1);
    return { rows, xScale, yScale };
};
const getNonTableBodyRows = (blocks, tableBodyRows, rowConfig, yScale) => {
    const blocksById = _.keyBy(blocks, "Id");
    const tableBodyBlockIds = new Set(tableBodyRows.flat().map((block) => block.id));
    const textractTableBodyRows = getTextractTableBodyRows(blocks);
    const blocksToTableRows = Object.fromEntries(textractTableBodyRows.flatMap((row, rowIndex) => row
        .map(blockUtils_1.getChildRelationshipIds)
        .filter(utils_2.isNotNullAndNotUndefined)
        .flat()
        .map((blockId) => [blockId, rowIndex])));
    const outsideRowPositions = blocks
        .filter((block) => block.BlockType === "WORD" && !tableBodyBlockIds.has(block.Id))
        .map((block) => {
        var _a, _b, _c, _d;
        return [
            block.Id,
            block.Page +
                (((_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Top) +
                    ((_d = (_c = block.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Height) / 2),
        ];
    });
    const outsideRows = (0, utils_1.groupPositionsByLine)(outsideRowPositions, yScale)
        .map(([, blockIds]) => blockIds
        .map((blockId) => blocksById[blockId])
        .sort((a, b) => {
        var _a, _b, _c, _d;
        return ((_b = (_a = a.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left) -
            ((_d = (_c = b.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Left);
    }))
        .filter((row) => row.length)
        .map((row, i) => {
        const containingRowInd = blocksToTableRows[row[0].Id];
        const containingRow = textractTableBodyRows[containingRowInd] || row;
        return row.map((block) => {
            return textractBlockToBoundedBlock(getBoundingBoxFromContainingRow(containingRow), block, i, rowConfig);
        });
    });
    return outsideRows;
};
const getTextractTableBodyRows = (blocks) => {
    const blocksById = _.keyBy(blocks, "Id");
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    return tableBlocks
        .map((tableBlock) => {
        const childIds = (0, blockUtils_1.getChildRelationshipIds)(tableBlock);
        if (childIds === undefined || !tableBlock.Id)
            return [];
        const tableBodyBlocks = childIds
            .map((id) => blocksById[id])
            .filter(({ BlockType, EntityTypes }) => BlockType === "CELL" && EntityTypes === undefined);
        return Object.values(_.groupBy(tableBodyBlocks, "RowIndex"));
    })
        .flat();
};
const appendEligibleHeaderRows = (blocks, tableBodyRows, template, rowSignatureGroups, rowConfig, baseRequiredFields, flags) => {
    const blocksById = _.keyBy(blocks, "Id");
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    const allHeaderRows = tableBlocks
        .map((tableBlock) => {
        const childIds = (0, blockUtils_1.getChildRelationshipIds)(tableBlock);
        if (childIds === undefined || !tableBlock.Id)
            return [];
        const columnHeaderBlocks = childIds
            .map((id) => blocksById[id])
            .filter(({ BlockType, EntityTypes }) => BlockType === "CELL" && (EntityTypes === null || EntityTypes === void 0 ? void 0 : EntityTypes.includes("COLUMN_HEADER")));
        return Object.values(_.groupBy(columnHeaderBlocks, (block) => block.RowIndex));
    })
        .flat()
        .map((row) => row
        .map(blockUtils_1.getChildRelationshipIds)
        .filter(utils_2.isNotNullAndNotUndefined)
        .flat()
        .map((blockId) => blocksById[blockId])
        .map((block) => textractBlockToBoundedBlock(getBoundingBoxFromContainingRow(row), block, row[0].RowIndex, rowConfig)));
    let eligibleHeaderRows = [];
    if (template) {
        const fieldBounds = (0, utils_1.getFieldBounds)(template, tableBodyRows, flags);
        eligibleHeaderRows = (0, exports.filterEligibleCommissionRows)(fieldBounds, null, allHeaderRows, baseRequiredFields, flags);
    }
    else if (rowSignatureGroups) {
        eligibleHeaderRows = (0, exports.filterEligibleCommissionRows)(null, rowSignatureGroups, allHeaderRows, baseRequiredFields, flags);
    }
    return [...tableBodyRows, ...eligibleHeaderRows]
        .filter((row) => row.length)
        .sort((a, b) => a[0].boundingBox.y - b[0].boundingBox.y);
};
/**
 * Applies additional filtering on unused rows on a per-group basis. This currently
 * primarily handles cases where we want to arbitrarily terminate matches at a
 * certain point in document to avoid matches with no otherwise clear differentiation
 * from other matches in row signature group.
 */
const filterRowsByCutoffText = (rows, { rowMatchStartCutoffText, rowMatchEndCutoffText, pageStartCutoffText, pageEndCutoffText, }) => {
    if ((0, utils_2.isNotNullAndNotUndefined)(rowMatchStartCutoffText)) {
        const rowStartIndex = findRowWithTextOrRegex(rows, rowMatchStartCutoffText);
        if (rowStartIndex !== -1) {
            rows = rows.slice(rowStartIndex, rows.length);
        }
    }
    if ((0, utils_2.isNotNullAndNotUndefined)(rowMatchEndCutoffText)) {
        const rowStopperIndex = findRowWithTextOrRegex(rows, rowMatchEndCutoffText);
        if (rowStopperIndex !== -1) {
            rows = rows.slice(0, rowStopperIndex);
        }
    }
    if ((0, utils_2.isNotNullAndNotUndefined)(pageStartCutoffText) ||
        (0, utils_2.isNotNullAndNotUndefined)(pageEndCutoffText)) {
        const rowsByPage = _.groupBy(rows, (row) => row[0].page);
        rows = Object.entries(rowsByPage).flatMap(([, pageRows]) => (0, exports.filterRowsByCutoffText)(pageRows, {
            rowMatchStartCutoffText: pageStartCutoffText,
            rowMatchEndCutoffText: pageEndCutoffText,
        }));
    }
    return rows;
};
exports.filterRowsByCutoffText = filterRowsByCutoffText;
const findRowWithTextOrRegex = (rows, textOrRegex) => rows.findIndex((row) => {
    const rowText = row.map((block) => block.text).join(" ");
    return (rowText.includes(textOrRegex) ||
        (0, utils_2.levenshteinDistance)(rowText, textOrRegex) <= 3 ||
        new RegExp(textOrRegex).test(rowText));
});
/**
 * Preprocessing filter that simply checks whether each row contains a sufficient
 * number of fields overlapping with the template and whether each row contains a
 * commission value. This is used to prevent extraneous data from being used when auto
 * detecting alignment.
 *
 * @param fieldBounds
 * @param rows
 * @returns
 */
const filterEligibleCommissionRows = (fieldBounds, rowSignatureGroups, rows, baseRequiredFields, flags) => {
    const configs = (rowSignatureGroups || [])
        .map((group) => group.flat())
        .flat();
    const fieldNames = fieldBounds
        ? getFieldNames(fieldBounds)
        : getFieldNames(configs);
    const fieldTypes = fieldBounds
        ? getFieldTypes(fieldBounds)
        : getFieldTypes(configs);
    const numStandardFields = fieldBounds
        ? getNumStandardFields(fieldBounds)
        : getNumStandardFields(configs);
    // Require at least 3 fields to lessen likelihood of random rows being picked up
    const minRequiredFields = Math.max(3, Math.min(numStandardFields, fieldTypes.filter((fieldType) => fieldType === graphql_types_1.AdemFieldType.NUMBER)
        .length + 1));
    const validRowMask = rows
        .map((row) => {
        if (fieldBounds)
            return getMatchedFieldsForTemplate(row, fieldBounds, flags);
        if (rowSignatureGroups)
            return getMatchedFieldsForConfigs(row, configs, flags);
        throw new Error("Either fieldBounds or rowSignatureGroups must be provided");
    })
        .map((row) => {
        const matchedFields = _.uniq(row.filter(utils_2.isNotNullAndNotUndefined));
        const isValidPartialRow = (0, utils_1.containsEndOfRow)(fieldTypes, baseRequiredFields, fieldNames, matchedFields) && matchedFields.length >= minRequiredFields;
        return (baseRequiredFields.every((field) => matchedFields.includes(field)) ||
            isValidPartialRow);
    });
    return rows.filter((_, index) => validRowMask[index]);
};
exports.filterEligibleCommissionRows = filterEligibleCommissionRows;
const getFieldNames = (items) => items.map(({ fieldName }) => fieldName);
const getFieldTypes = (items) => items.map(({ field }) => (0, utils_1.getFieldType)(field));
const getNumStandardFields = (items) => items.filter(({ field }) => field !== graphql_types_1.AdemField.CUSTOM).length;
const getMatchedFieldsForTemplate = (row, fieldBounds, flags) => row.map((block) => {
    const xMin = block.boundingBox.x;
    const xMax = block.boundingBox.x + block.boundingBox.width;
    const xMid = (xMin + xMax) / 2;
    const match = fieldBounds.find((_a) => {
        var { field, fieldName } = _a, bounds = __rest(_a, ["field", "fieldName"]);
        const isLeftAligned = Math.abs(xMin - bounds.xMin) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
        const isCenterAligned = Math.abs(xMid - bounds.xMid) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
        const isRightAligned = Math.abs(xMax - bounds.xMax) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
        const isMatch = (isLeftAligned || isCenterAligned || isRightAligned) &&
            (0, utils_1.isValidMatchedBlockText)((0, utils_1.getFieldType)(field), block.text, flags);
        return isMatch ? fieldName : null;
    });
    return match ? match.fieldName : null;
});
const getMatchedFieldsForConfigs = (row, configs, flags) => row
    .map((block) => {
    const xMin = block.boundingBox.x;
    const xMax = block.boundingBox.x + block.boundingBox.width;
    const xMid = (xMin + xMax) / 2;
    return configs.map(({ field, fieldName, fieldMatchConfig }) => {
        if ((0, utils_1.isPositionBoundaryConfig)(fieldMatchConfig.startBound)) {
            const positionX = fieldMatchConfig.startBound.positionX;
            const alignment = fieldMatchConfig.alignment;
            let isMatch = false;
            if (alignment === graphql_types_1.AdemFieldAlignment.LEFT) {
                isMatch =
                    Math.abs(xMin - positionX) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
            }
            else if (alignment === graphql_types_1.AdemFieldAlignment.CENTER) {
                isMatch =
                    Math.abs(xMid - positionX) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
            }
            else if (alignment === graphql_types_1.AdemFieldAlignment.RIGHT) {
                isMatch =
                    Math.abs(xMax - positionX) < constants_1.DEFAULT_OFFSET_ERROR_THRESHOLD;
            }
            return isMatch &&
                (0, utils_1.isValidMatch)(row, [block], field, (0, utils_1.getFieldType)(field), flags)
                ? fieldName
                : null;
        }
        return null;
    });
})
    .flat();
const getBoundingBoxFromContainingRow = (row) => {
    var _a, _b, _c, _d, _e, _f, _g, _h, _j;
    const rowStartX = (_b = (_a = row[0].Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left;
    const rowStartY = (_d = (_c = row[0].Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Top;
    const rowEndX = (_g = (_f = (_e = row[row.length - 1].Geometry) === null || _e === void 0 ? void 0 : _e.Polygon) === null || _f === void 0 ? void 0 : _f[1]) === null || _g === void 0 ? void 0 : _g.X;
    const rowEndY = (_j = (_h = row[row.length - 1].Geometry) === null || _h === void 0 ? void 0 : _h.Polygon) === null || _j === void 0 ? void 0 : _j[1].Y;
    return {
        x: rowStartX,
        y: rowStartY,
        width: rowEndX - rowStartX,
        height: rowEndY - rowStartY,
    };
};
const getPageTableBounds = (blocks) => {
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    return Object.fromEntries(Object.entries(_.groupBy(tableBlocks, "Page")).map(([page, pageTableBlocks]) => {
        var _a, _b, _c, _d, _e, _f, _g, _h, _j;
        const xMinBlock = _.minBy(pageTableBlocks, (block) => { var _a, _b; return (_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left; });
        const xMaxBlock = _.maxBy(pageTableBlocks, (block) => {
            var _a, _b, _c, _d;
            return ((_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left) +
                ((_d = (_c = block.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Width);
        });
        return [
            page,
            {
                xMin: ((_b = (_a = xMinBlock === null || xMinBlock === void 0 ? void 0 : xMinBlock.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Left) || 1,
                xMax: ((_d = (_c = xMaxBlock === null || xMaxBlock === void 0 ? void 0 : xMaxBlock.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Left) +
                    ((_f = (_e = xMaxBlock === null || xMaxBlock === void 0 ? void 0 : xMaxBlock.Geometry) === null || _e === void 0 ? void 0 : _e.BoundingBox) === null || _f === void 0 ? void 0 : _f.Width) || 1,
                width: ((_j = (_h = (_g = _.minBy(pageTableBlocks, (block) => { var _a, _b; return (_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Width; })) === null || _g === void 0 ? void 0 : _g.Geometry) === null || _h === void 0 ? void 0 : _h.BoundingBox) === null || _j === void 0 ? void 0 : _j.Width) || 1,
            },
        ];
    }));
};
const getMaxTableWidth = (blocks, documentIdentifierHeaders) => {
    var _a, _b, _c;
    const blocksById = _.keyBy(blocks, "Id");
    return (((_c = (_b = (_a = _.maxBy(getDocumentIdentifierHeaderTables(blocks, blocksById, documentIdentifierHeaders), (block) => { var _a, _b; return (_b = (_a = block.Geometry) === null || _a === void 0 ? void 0 : _a.BoundingBox) === null || _b === void 0 ? void 0 : _b.Width; })) === null || _a === void 0 ? void 0 : _a.Geometry) === null || _b === void 0 ? void 0 : _b.BoundingBox) === null || _c === void 0 ? void 0 : _c.Width) || 1);
};
const getDocumentIdentifierHeaderTables = (blocks, blocksById, documentIdentifierHeaders) => {
    const tableBlocks = blocks.filter((block) => block.BlockType === "TABLE");
    return tableBlocks.filter((tableBlock) => {
        const childIds = (0, blockUtils_1.getChildRelationshipIds)(tableBlock);
        if (childIds === undefined || !tableBlock.Id)
            return [];
        const tableHeadersStr = childIds
            .map((id) => blocksById[id])
            .filter(({ BlockType, EntityTypes }) => BlockType === "CELL" && (EntityTypes === null || EntityTypes === void 0 ? void 0 : EntityTypes.includes("COLUMN_HEADER")))
            .map((block) => { var _a; return (_a = (0, blockUtils_1.getChildRelationshipIds)(block)) === null || _a === void 0 ? void 0 : _a.map((childId) => blocksById[childId]); })
            .filter(utils_2.isNotNullAndNotUndefined)
            .flat()
            .map((block) => block.Text)
            .join("");
        return documentIdentifierHeaders.every((header) => tableHeadersStr.includes(header));
    });
};
const textractBlockToBoundedBlock = (rowBoundingBox, block, rowIndex, { disableRowOffsetNormalization, disableRotationOffsetNormalization, disablePageScaleNormalization, enablePageBasisRowOffsetNormalization, pageBounds = {}, maxTableWidth = 1, }) => {
    var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
    const rowEndX = rowBoundingBox.x + rowBoundingBox.width;
    const rowEndY = rowBoundingBox.y + rowBoundingBox.height;
    const page = block.Page;
    const pageXMin = ((_a = pageBounds[page]) === null || _a === void 0 ? void 0 : _a.xMin) || 0;
    const pageXMax = ((_b = pageBounds[page]) === null || _b === void 0 ? void 0 : _b.xMax) || 0;
    const blockX = (_d = (_c = block.Geometry) === null || _c === void 0 ? void 0 : _c.BoundingBox) === null || _d === void 0 ? void 0 : _d.Left;
    const blockY = (_f = (_e = block.Geometry) === null || _e === void 0 ? void 0 : _e.BoundingBox) === null || _f === void 0 ? void 0 : _f.Top;
    const width = (_h = (_g = block.Geometry) === null || _g === void 0 ? void 0 : _g.BoundingBox) === null || _h === void 0 ? void 0 : _h.Width;
    const height = (_k = (_j = block.Geometry) === null || _j === void 0 ? void 0 : _j.BoundingBox) === null || _k === void 0 ? void 0 : _k.Height;
    const offsetY = disableRotationOffsetNormalization
        ? 0
        : (rowEndY - rowBoundingBox.y) * (blockX / rowEndX);
    const offsetX = disableRowOffsetNormalization ? 0 : pageXMin;
    const rowWidth = enablePageBasisRowOffsetNormalization
        ? pageXMax - pageXMin
        : disablePageScaleNormalization
            ? 1
            : maxTableWidth;
    return {
        id: block.Id,
        text: block.Text || "",
        boundingBox: {
            x: (blockX - offsetX) / rowWidth,
            y: blockY - offsetY + page,
            width: width / rowWidth,
            height,
        },
        offsetX,
        offsetY,
        rowIndex,
        rowWidth,
        page,
    };
};
