import { lte, gte, between, lt } from "modules/utils/functions";

import {
  DnaAssayQcFieldKey,
  OncologyAssayQcField,
  RnaAssayQcFieldKey,
} from "./types";

export const DNA_ASSAY_QC_FIELDS_CONFIG: OncologyAssayQcField<DnaAssayQcFieldKey>[] =
  [
    {
      label: "Duplicates",
      description:
        "The % of reads that are discarded due to being duplicate copies of other read pairs.",
      field: "duplicates_pct",
      decimalPlaces: 1,
      warningMessage:
        "The Congenica system recommends duplicate percentages of less than 80%.",
      rule: lt(80),
      unit: "%",
    },
    {
      label: "Unique Sequence Generated",
      description:
        "The amount of sequencing data left for analysis after duplicates have been discarded.",
      field: "unique_sequence_generated_mb",
      decimalPlaces: 0,
      unit: "Mb",
    },
    {
      label: "Unmapped",
      description:
        "The % of reads that do not map to the reference genome used for analysis. These may be due to contamination with adapter sequences, non-human DNA such as viruses or PhiX, human DNA sequences that are not currently represented in the genome or other factors.",
      field: "unmapped_pct",
      decimalPlaces: 2,
      warningMessage:
        "The Congenica system requires unmapped read percentages to be below 2%.",
      rule: lte(2),
      unit: "%",
    },
    {
      label: "Discordant",
      description:
        "The % of read pairs that are structurally different to the reference genome, for example due to chimeras formed during library preparation, structural polymorphisms, somatic structural variants (SVs) or other factors.",
      field: "discordant_pct",
      decimalPlaces: 2,
      warningMessage:
        "The Congenica system requires discordant read percentages to be below 3%.",
      rule: lte(3),
      unit: "%",
    },
    {
      label: "Non Reference",
      description:
        "The % of bases that do not match the reference genome, for example due to PCR or sequencing errors, polymorphisms, mis-mappings, mutations or other factors.",
      field: "non_reference_pct",
      decimalPlaces: 2,
      unit: "%",
    },
    {
      label: "Average Read Length",
      description:
        "The average length of sequencing reads in the input file in base pairs.",
      field: "average_read_length_bp",
      decimalPlaces: 0,
      rule: between(74, 76),
      warningMessage:
        "The Congenica system supports average read lengths between 74 and 76 base pairs.",
      unit: "bp",
    },
    {
      label: "Average Insert Size",
      description:
        "The average insert size in the sequenced library in base pairs.",
      field: "average_insert_size_bp",
      decimalPlaces: 0,
      // TODO: it's not calculated there actually - do we need to?
      //rule is calculated in `perl/sapientia-frontend/src/modules/qc/components/oncology.tsx`
      //in oncologySecondaryPipelineDetails
      //because it is special rule describes
      //that the average insert size should be at least twice the read length
      warningMessage:
        "The Congenica system requires the library insert size to be at least twice the read length.",
      unit: "bp",
    },
    {
      label: "Standard Deviation of the Insert Size",
      description:
        "The standard deviation of the insert size in the sequenced library in base pairs.",
      field: "st_dev_insert_size_bp",
      decimalPlaces: 0,
      warningMessage:
        "The Congenica system requires the standard deviation of the library insert size to be below 100bp.",
      rule: lte(100),
      unit: "bp",
    },
    {
      label: "On Target",
      description:
        "The % of reads that map to the regions targeted by the sequence capture assay. Note this is calculated after duplicates are removed.",
      field: "on_target_pct",
      decimalPlaces: 0,
      unit: "%",
    },
    {
      label: "Design Size",
      description: "The footprint of the sequence capture assay.",
      field: "design_size_mb",
      decimalPlaces: 1,
      unit: "Mb",
    },
    {
      label: "Average Depth",
      description:
        "The average coverage across the entire footprint of the sequence capture assay. Genes that do not meet required coverage thresholds are displayed individually in the section below.",
      field: "avg_depth",
      decimalPlaces: 0,
      warningMessage:
        "The Congenica system requires an average depth across the design of at least 100x.",
      rule: gte(100),
    },
  ];

type RNA_ASSAY_QC_FIELDS_CATEGORIES =
  | "GENERAL_FIELDS"
  | "UNIQUE_READS_FIELDS"
  | "MULTI_MAPPING_READS_FIELDS"
  | "UNMAPPED_READS_FIELDS"
  | "CHIMERIC_READS_FIELDS";

export const RNA_ASSAY_QC_FIELDS_CONFIG: Record<
  RNA_ASSAY_QC_FIELDS_CATEGORIES,
  {
    heading?: string;
    fields: Omit<OncologyAssayQcField<RnaAssayQcFieldKey>, "description">[];
  }
> = {
  GENERAL_FIELDS: {
    fields: [
      {
        label: "Number of input reads",
        field: "input_reads_number",
        decimalPlaces: 0,
      },
      {
        label: "Average input read length",
        field: "avg_input_read_length",
        decimalPlaces: 0,
        unit: "bp",
      },
    ],
  },
  UNIQUE_READS_FIELDS: {
    heading: "Unique reads",
    fields: [
      {
        label: "Uniquely mapped reads",
        field: "uniquely_mapped_reads_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires uniquely mapped reads percentage to be at least 70%.",
        rule: gte(70),
        unit: "%",
      },
      {
        label: "Average mapped length",
        field: "avg_mapped_length",
        decimalPlaces: 0,
        unit: "bp",
      },
      {
        label: "Total number of splicing event in the input reads",
        field: "splices_total_number",
        decimalPlaces: 0,
      },
      {
        label: "Splicing events: annotated",
        field: "splices_annotated_sjdb_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires annotated splicing events percentage to be at least 80%.",
        rule: gte(80),
        unit: "%",
      },
      {
        label: "Splicing events: GT/AG",
        field: "splices_gt_ag_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires GT/AG splicing events percentage to be at least 98%.",
        rule: gte(98),
        unit: "%",
      },
      {
        label: "Splicing events: GC/AG",
        field: "splices_gc_ag_pct",
        decimalPlaces: 2,
        unit: "%",
      },
      {
        label: "Splicing events: AT/AC",
        field: "splices_at_ac_pct",
        decimalPlaces: 2,
        unit: "%",
      },
      {
        label: "Splicing events: Non-canonical",
        field: "splices_non_canonical_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires non canonical splicing events percentage to be below 5%.",
        rule: lte(5),
        unit: "%",
      },
      {
        label: "Mismatch rate per base",
        field: "mismatch_rate_per_base_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires mismatch rate per base percentage to be below 2%.",
        rule: lte(2),
        unit: "%",
      },
      {
        label: "Deletion rate per base",
        field: "deletion_rate_per_base",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires delete rate per base percentage to be below 0.5%.",
        rule: lte(0.5),
        unit: "%",
      },
      {
        label: "Deletion average length",
        field: "deletion_avg_length",
        decimalPlaces: 2,
        unit: "bp",
      },
      {
        label: "Insertion rate per base",
        field: "insertion_rate_per_base_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires insertion rate per base percentage to be below 0.5%.",
        rule: lte(0.5),
        unit: "%",
      },
      {
        label: "Insertion average length",
        field: "insertion_avg_length",
        decimalPlaces: 2,
        unit: "bp",
      },
    ],
  },
  MULTI_MAPPING_READS_FIELDS: {
    heading: "Multi-mapping reads",
    fields: [
      {
        label: "Reads mapped to multiple loci",
        field: "reads_mapped_to_multiple_loci_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires reads mapped to multiple loci percentage to be below 10%.",
        rule: lte(10),
        unit: "%",
      },
      {
        label: "Reads mapped to too many loci",
        field: "reads_mapped_to_too_many_loci_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires reads mapped to too many loci percentage to be below 5%.",
        rule: lte(5),
        unit: "%",
      },
    ],
  },
  UNMAPPED_READS_FIELDS: {
    heading: "Unmapped reads",
    fields: [
      {
        label: "Reads unmapped: too many mismatches",
        field: "reads_unmapped_too_many_mismatches_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires reads unmapped due to too many mismatches percentage to be below 1.5%.",
        rule: lte(1.5),
        unit: "%",
      },
      {
        label: "Reads unmapped: too short",
        field: "reads_unmapped_too_short_pct",
        decimalPlaces: 2,
        unit: "%",
      },
      {
        label: "Reads unmapped: other",
        field: "reads_unmapped_other_pct",
        decimalPlaces: 2,
        unit: "%",
      },
    ],
  },
  CHIMERIC_READS_FIELDS: {
    heading: "Chimeric reads",
    fields: [
      {
        label: "Chimeric reads",
        field: "chimeric_reads_pct",
        decimalPlaces: 2,
        warningMessage:
          "The Congenica system requires chimeric reads percentage to be below 3%.",
        rule: lte(3),
        unit: "%",
      },
    ],
  },
};
