######## snakemake preamble start (automatically inserted, do not edit) ########
import sys; sys.path.extend(['/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/site-packages', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax', '/home/aloes/miniconda3/envs/seqneut-pipeline/bin', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/lib-dynload', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/site-packages', '/home/aloes/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpzg9btu6h/file/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95pQ\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c,results/barcode_counts/plate14lib_none-1.csv\x94\x8c,results/barcode_counts/plate14lib_none-2.csv\x94\x8c6results/barcode_counts/plate14lib_fulllibconc-1_60.csv\x94\x8c6results/barcode_counts/plate14lib_fulllibconc-2_60.csv\x94\x8c7results/barcode_counts/plate14lib_fulllibconc-1_180.csv\x94\x8c7results/barcode_counts/plate14lib_fulllibconc-2_180.csv\x94\x8c7results/barcode_counts/plate14lib_fulllibconc-1_540.csv\x94\x8c7results/barcode_counts/plate14lib_fulllibconc-2_540.csv\x94\x8c8results/barcode_counts/plate14lib_fulllibconc-1_1620.csv\x94\x8c8results/barcode_counts/plate14lib_fulllibconc-2_1620.csv\x94\x8c8results/barcode_counts/plate14lib_fulllibconc-1_4860.csv\x94\x8c8results/barcode_counts/plate14lib_fulllibconc-2_4860.csv\x94\x8c9results/barcode_counts/plate14lib_fulllibconc-1_14580.csv\x94\x8c9results/barcode_counts/plate14lib_fulllibconc-2_14580.csv\x94\x8c9results/barcode_counts/plate14lib_fulllibconc-1_43740.csv\x94\x8c9results/barcode_counts/plate14lib_fulllibconc-2_43740.csv\x94\x8c:results/barcode_counts/plate14lib_fulllibconc-1_131220.csv\x94\x8c:results/barcode_counts/plate14lib_fulllibconc-2_131220.csv\x94\x8c:results/barcode_counts/plate14lib_fulllibconc-1_393660.csv\x94\x8c:results/barcode_counts/plate14lib_fulllibconc-2_393660.csv\x94\x8c;results/barcode_counts/plate14lib_fulllibconc-1_1180980.csv\x94\x8c;results/barcode_counts/plate14lib_fulllibconc-2_1180980.csv\x94\x8c,results/barcode_counts/plate14lib_none-3.csv\x94\x8c,results/barcode_counts/plate14lib_none-4.csv\x94\x8c+results/barcode_fates/plate14lib_none-1.csv\x94\x8c+results/barcode_fates/plate14lib_none-2.csv\x94\x8c5results/barcode_fates/plate14lib_fulllibconc-1_60.csv\x94\x8c5results/barcode_fates/plate14lib_fulllibconc-2_60.csv\x94\x8c6results/barcode_fates/plate14lib_fulllibconc-1_180.csv\x94\x8c6results/barcode_fates/plate14lib_fulllibconc-2_180.csv\x94\x8c6results/barcode_fates/plate14lib_fulllibconc-1_540.csv\x94\x8c6results/barcode_fates/plate14lib_fulllibconc-2_540.csv\x94\x8c7results/barcode_fates/plate14lib_fulllibconc-1_1620.csv\x94\x8c7results/barcode_fates/plate14lib_fulllibconc-2_1620.csv\x94\x8c7results/barcode_fates/plate14lib_fulllibconc-1_4860.csv\x94\x8c7results/barcode_fates/plate14lib_fulllibconc-2_4860.csv\x94\x8c8results/barcode_fates/plate14lib_fulllibconc-1_14580.csv\x94\x8c8results/barcode_fates/plate14lib_fulllibconc-2_14580.csv\x94\x8c8results/barcode_fates/plate14lib_fulllibconc-1_43740.csv\x94\x8c8results/barcode_fates/plate14lib_fulllibconc-2_43740.csv\x94\x8c9results/barcode_fates/plate14lib_fulllibconc-1_131220.csv\x94\x8c9results/barcode_fates/plate14lib_fulllibconc-2_131220.csv\x94\x8c9results/barcode_fates/plate14lib_fulllibconc-1_393660.csv\x94\x8c9results/barcode_fates/plate14lib_fulllibconc-2_393660.csv\x94\x8c:results/barcode_fates/plate14lib_fulllibconc-1_1180980.csv\x94\x8c:results/barcode_fates/plate14lib_fulllibconc-2_1180980.csv\x94\x8c+results/barcode_fates/plate14lib_none-3.csv\x94\x8c+results/barcode_fates/plate14lib_none-4.csv\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\ncount_csvs\x94K\x00K\x18\x86\x94\x8c\tfate_csvs\x94K\x18K0\x86\x94\x8c\x11viral_library_csv\x94K0N\x86\x94\x8c\x15neut_standard_set_csv\x94K1N\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94ehI\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(hO)}\x94\x8c\x05_name\x94hIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bh?h\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0bh\x0ch\rh\x0eh\x0fh\x10h\x11h\x12h\x13h\x14h\x15h\x16h\x17h\x18h\x19h\x1ah\x1bh\x1ch\x1dh\x1eh\x1fh h!e}\x94(h=}\x94hG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bubhAhZ)\x81\x94(h"h#h$h%h&h\'h(h)h*h+h,h-h.h/h0h1h2h3h4h5h6h7h8h9e}\x94(h=}\x94hG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bubhCh:hEh;ub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c&results/plates/plate14lib/qc_drops.yml\x94\x8c.results/plates/plate14lib/frac_infectivity.csv\x94\x8c\'results/plates/plate14lib/curvefits.csv\x94\x8c*results/plates/plate14lib/curvefits.pickle\x94e}\x94(h=}\x94(\x8c\x08qc_drops\x94K\x00N\x86\x94\x8c\x14frac_infectivity_csv\x94K\x01N\x86\x94\x8c\x08fits_csv\x94K\x02N\x86\x94\x8c\x0bfits_pickle\x94K\x03N\x86\x94uhG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bh}hwh\x7fhxh\x81hyh\x83hzub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x11plate14lib_none-1\x94\x8c\x11plate14lib_none-2\x94\x8c\x1bplate14lib_fulllibconc-1_60\x94\x8c\x1bplate14lib_fulllibconc-2_60\x94\x8c\x1cplate14lib_fulllibconc-1_180\x94\x8c\x1cplate14lib_fulllibconc-2_180\x94\x8c\x1cplate14lib_fulllibconc-1_540\x94\x8c\x1cplate14lib_fulllibconc-2_540\x94\x8c\x1dplate14lib_fulllibconc-1_1620\x94\x8c\x1dplate14lib_fulllibconc-2_1620\x94\x8c\x1dplate14lib_fulllibconc-1_4860\x94\x8c\x1dplate14lib_fulllibconc-2_4860\x94\x8c\x1eplate14lib_fulllibconc-1_14580\x94\x8c\x1eplate14lib_fulllibconc-2_14580\x94\x8c\x1eplate14lib_fulllibconc-1_43740\x94\x8c\x1eplate14lib_fulllibconc-2_43740\x94\x8c\x1fplate14lib_fulllibconc-1_131220\x94\x8c\x1fplate14lib_fulllibconc-2_131220\x94\x8c\x1fplate14lib_fulllibconc-1_393660\x94\x8c\x1fplate14lib_fulllibconc-2_393660\x94\x8c plate14lib_fulllibconc-1_1180980\x94\x8c plate14lib_fulllibconc-2_1180980\x94\x8c\x11plate14lib_none-3\x94\x8c\x11plate14lib_none-4\x94e}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94\x8c\n2023-08-07\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c&data/plates/plate14fulllib_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94M\xe8\x03\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x04\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x04\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xe8\x03\x8c)min_no_serum_count_per_viral_barcode_well\x94M\xf4\x01\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c\x0fcurvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94]\x94(G?\xe0\x00\x00\x00\x00\x00\x00K\x01e\x8c\tfixbottom\x94K\x00\x8c\x08fixslope\x94]\x94(G?\xe9\x99\x99\x99\x99\x99\x9aK\neu\x8c\x0bcurvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x0fgoodness_of_fit\x94}\x94(\x8c\x06min_R2\x94G?\xe3333333\x8c\x08max_RMSD\x94G?\xb9\x99\x99\x99\x99\x99\x9au\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1cCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01u\x8c\x07samples\x94}\x94(\x8c\x04well\x94}\x94(K\x00\x8c\x02G1\x94K\x01\x8c\x02H1\x94K\x02\x8c\x02G2\x94K\x03\x8c\x02H2\x94K\x04\x8c\x02G3\x94K\x05\x8c\x02H3\x94K\x06\x8c\x02G4\x94K\x07\x8c\x02H4\x94K\x08\x8c\x02G5\x94K\t\x8c\x02H5\x94K\n\x8c\x02G6\x94K\x0b\x8c\x02H6\x94K\x0c\x8c\x02G7\x94K\r\x8c\x02H7\x94K\x0e\x8c\x02G8\x94K\x0f\x8c\x02H8\x94K\x10\x8c\x02G9\x94K\x11\x8c\x02H9\x94K\x12\x8c\x03G10\x94K\x13\x8c\x03H10\x94K\x14\x8c\x03G11\x94K\x15\x8c\x03H11\x94K\x16\x8c\x03G12\x94K\x17\x8c\x03H12\x94u\x8c\x05serum\x94}\x94(K\x00\x8c\x04none\x94K\x01j\x08\x01\x00\x00K\x02\x8c\x0bfulllibconc\x94K\x03j\t\x01\x00\x00K\x04j\t\x01\x00\x00K\x05j\t\x01\x00\x00K\x06j\t\x01\x00\x00K\x07j\t\x01\x00\x00K\x08j\t\x01\x00\x00K\tj\t\x01\x00\x00K\nj\t\x01\x00\x00K\x0bj\t\x01\x00\x00K\x0cj\t\x01\x00\x00K\rj\t\x01\x00\x00K\x0ej\t\x01\x00\x00K\x0fj\t\x01\x00\x00K\x10j\t\x01\x00\x00K\x11j\t\x01\x00\x00K\x12j\t\x01\x00\x00K\x13j\t\x01\x00\x00K\x14j\t\x01\x00\x00K\x15j\t\x01\x00\x00K\x16j\x08\x01\x00\x00K\x17j\x08\x01\x00\x00u\x8c\x0fdilution_factor\x94}\x94(K\x00NK\x01NK\x02K<K\x03K<K\x04K\xb4K\x05K\xb4K\x06M\x1c\x02K\x07M\x1c\x02K\x08MT\x06K\tMT\x06K\nM\xfc\x12K\x0bM\xfc\x12K\x0cM\xf48K\rM\xf48K\x0eM\xdc\xaaK\x0fM\xdc\xaaK\x10J\x94\x00\x02\x00K\x11J\x94\x00\x02\x00K\x12J\xbc\x01\x06\x00K\x13J\xbc\x01\x06\x00K\x14J4\x05\x12\x00K\x15J4\x05\x12\x00K\x16NK\x17Nu\x8c\treplicate\x94}\x94(K\x00K\x01K\x01K\x02K\x02K\x01K\x03K\x02K\x04K\x01K\x05K\x02K\x06K\x01K\x07K\x02K\x08K\x01K\tK\x02K\nK\x01K\x0bK\x02K\x0cK\x01K\rK\x02K\x0eK\x01K\x0fK\x02K\x10K\x01K\x11K\x02K\x12K\x01K\x13K\x02K\x14K\x01K\x15K\x02K\x16K\x03K\x17K\x04u\x8c\x05fastq\x94}\x94(K\x00\x8cx/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_7_S11_R1_001.fastq.gz\x94K\x01\x8cx/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_8_S12_R1_001.fastq.gz\x94K\x02\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_15_S19_R1_001.fastq.gz\x94K\x03\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_16_S20_R1_001.fastq.gz\x94K\x04\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_23_S27_R1_001.fastq.gz\x94K\x05\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_24_S28_R1_001.fastq.gz\x94K\x06\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_31_S35_R1_001.fastq.gz\x94K\x07\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_32_S36_R1_001.fastq.gz\x94K\x08\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_39_S43_R1_001.fastq.gz\x94K\t\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_40_S44_R1_001.fastq.gz\x94K\n\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_47_S51_R1_001.fastq.gz\x94K\x0b\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_48_S52_R1_001.fastq.gz\x94K\x0c\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_55_S59_R1_001.fastq.gz\x94K\r\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_56_S60_R1_001.fastq.gz\x94K\x0e\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_63_S67_R1_001.fastq.gz\x94K\x0f\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_64_S68_R1_001.fastq.gz\x94K\x10\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_71_S75_R1_001.fastq.gz\x94K\x11\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_72_S76_R1_001.fastq.gz\x94K\x12\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_79_S83_R1_001.fastq.gz\x94K\x13\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_80_S84_R1_001.fastq.gz\x94K\x14\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_87_S91_R1_001.fastq.gz\x94K\x15\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_88_S92_R1_001.fastq.gz\x94K\x16\x8cy/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_95_S99_R1_001.fastq.gz\x94K\x17\x8cz/fh/fast/bloom_j/SR/ngs/illumina/aloes/240805_VH00319_539_AAG3FFNM5/Unaligned/Project_aloes/PlateB_96_S100_R1_001.fastq.gz\x94u\x8c\x0fserum_replicate\x94}\x94(K\x00\x8c\x06none-1\x94K\x01\x8c\x06none-2\x94K\x02\x8c\rfulllibconc-1\x94K\x03\x8c\rfulllibconc-2\x94K\x04\x8c\rfulllibconc-1\x94K\x05\x8c\rfulllibconc-2\x94K\x06\x8c\rfulllibconc-1\x94K\x07\x8c\rfulllibconc-2\x94K\x08\x8c\rfulllibconc-1\x94K\t\x8c\rfulllibconc-2\x94K\n\x8c\rfulllibconc-1\x94K\x0b\x8c\rfulllibconc-2\x94K\x0c\x8c\rfulllibconc-1\x94K\r\x8c\rfulllibconc-2\x94K\x0e\x8c\rfulllibconc-1\x94K\x0f\x8c\rfulllibconc-2\x94K\x10\x8c\rfulllibconc-1\x94K\x11\x8c\rfulllibconc-2\x94K\x12\x8c\rfulllibconc-1\x94K\x13\x8c\rfulllibconc-2\x94K\x14\x8c\rfulllibconc-1\x94K\x15\x8c\rfulllibconc-2\x94K\x16\x8c\x06none-3\x94K\x17\x8c\x06none-4\x94u\x8c\x0esample_noplate\x94}\x94(K\x00j*\x01\x00\x00K\x01j+\x01\x00\x00K\x02\x8c\x10fulllibconc-1_60\x94K\x03\x8c\x10fulllibconc-2_60\x94K\x04\x8c\x11fulllibconc-1_180\x94K\x05\x8c\x11fulllibconc-2_180\x94K\x06\x8c\x11fulllibconc-1_540\x94K\x07\x8c\x11fulllibconc-2_540\x94K\x08\x8c\x12fulllibconc-1_1620\x94K\t\x8c\x12fulllibconc-2_1620\x94K\n\x8c\x12fulllibconc-1_4860\x94K\x0b\x8c\x12fulllibconc-2_4860\x94K\x0c\x8c\x13fulllibconc-1_14580\x94K\r\x8c\x13fulllibconc-2_14580\x94K\x0e\x8c\x13fulllibconc-1_43740\x94K\x0f\x8c\x13fulllibconc-2_43740\x94K\x10\x8c\x14fulllibconc-1_131220\x94K\x11\x8c\x14fulllibconc-2_131220\x94K\x12\x8c\x14fulllibconc-1_393660\x94K\x13\x8c\x14fulllibconc-2_393660\x94K\x14\x8c\x15fulllibconc-1_1180980\x94K\x15\x8c\x15fulllibconc-2_1180980\x94K\x16j@\x01\x00\x00K\x17jA\x01\x00\x00u\x8c\x06sample\x94}\x94(K\x00h\x93K\x01h\x94K\x02h\x95K\x03h\x96K\x04h\x97K\x05h\x98K\x06h\x99K\x07h\x9aK\x08h\x9bK\th\x9cK\nh\x9dK\x0bh\x9eK\x0ch\x9fK\rh\xa0K\x0eh\xa1K\x0fh\xa2K\x10h\xa3K\x11h\xa4K\x12h\xa5K\x13h\xa6K\x14h\xa7K\x15h\xa8K\x16h\xa9K\x17h\xaau\x8c\x05plate\x94}\x94(K\x00\x8c\nplate14lib\x94K\x01j\\\x01\x00\x00K\x02j\\\x01\x00\x00K\x03j\\\x01\x00\x00K\x04j\\\x01\x00\x00K\x05j\\\x01\x00\x00K\x06j\\\x01\x00\x00K\x07j\\\x01\x00\x00K\x08j\\\x01\x00\x00K\tj\\\x01\x00\x00K\nj\\\x01\x00\x00K\x0bj\\\x01\x00\x00K\x0cj\\\x01\x00\x00K\rj\\\x01\x00\x00K\x0ej\\\x01\x00\x00K\x0fj\\\x01\x00\x00K\x10j\\\x01\x00\x00K\x11j\\\x01\x00\x00K\x12j\\\x01\x00\x00K\x13j\\\x01\x00\x00K\x14j\\\x01\x00\x00K\x15j\\\x01\x00\x00K\x16j\\\x01\x00\x00K\x17j\\\x01\x00\x00u\x8c\x0fplate_replicate\x94}\x94(K\x00\x8c\x0cplate14lib-1\x94K\x01\x8c\x0cplate14lib-2\x94K\x02\x8c\x0cplate14lib-1\x94K\x03\x8c\x0cplate14lib-2\x94K\x04\x8c\x0cplate14lib-1\x94K\x05\x8c\x0cplate14lib-2\x94K\x06\x8c\x0cplate14lib-1\x94K\x07\x8c\x0cplate14lib-2\x94K\x08\x8c\x0cplate14lib-1\x94K\t\x8c\x0cplate14lib-2\x94K\n\x8c\x0cplate14lib-1\x94K\x0b\x8c\x0cplate14lib-2\x94K\x0c\x8c\x0cplate14lib-1\x94K\r\x8c\x0cplate14lib-2\x94K\x0e\x8c\x0cplate14lib-1\x94K\x0f\x8c\x0cplate14lib-2\x94K\x10\x8c\x0cplate14lib-1\x94K\x11\x8c\x0cplate14lib-2\x94K\x12\x8c\x0cplate14lib-1\x94K\x13\x8c\x0cplate14lib-2\x94K\x14\x8c\x0cplate14lib-1\x94K\x15\x8c\x0cplate14lib-2\x94K\x16\x8c\x0cplate14lib-3\x94K\x17\x8c\x0cplate14lib-4\x94uuue}\x94(h=}\x94(h\xeaK\x00N\x86\x94\x8c\x0cplate_params\x94K\x01N\x86\x94uhG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bh\xeah\x92jz\x01\x00\x00h\xabub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94\x8c\nplate14lib\x94a}\x94(h=}\x94\x8c\x05plate\x94K\x00N\x86\x94shG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bjZ\x01\x00\x00j\x89\x01\x00\x00ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x15/loc/scratch/58619952\x94e}\x94(h=}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uhG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bj\x9f\x01\x00\x00K\x01j\xa1\x01\x00\x00K\x01j\xa3\x01\x00\x00j\x9c\x01\x00\x00ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c2results/plates/plate14lib/process_plate14lib.ipynb\x94a}\x94(h=}\x94\x8c\x08notebook\x94K\x00N\x86\x94shG]\x94(hIhJehIhMhO\x85\x94R\x94(hO)}\x94hShIsNt\x94bhJhMhO\x85\x94R\x94(hO)}\x94hShJsNt\x94bj\xb5\x01\x00\x00j\xb2\x01\x00\x00ub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x10seqneut-pipeline\x94\x8c\x04docs\x94\x8c\x04docs\x94\x8c\x0bdescription\x94X\xfa\x01\x00\x00# Sequencing-based neutralization assays of 2021-2022 DRIVE samples versus H1N1 influenza libraries\nStudy by Loes et al of samples from the DRIVE cohort using sequencing-based neutralization assay developed in the Bloom lab.\n\nSee [Loes et al (2024)](https://doi.org/10.1101/2024.03.08.584176) for the citation for this study.\n\nThe numerical data and computer code are at [https://github.com/jbloomlab/flu_seqneut_DRIVE_2021-22_repeat_vax](https://github.com/jbloomlab/flu_seqneut_DRIVE_2021-22_repeat_vax)\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x0finitial_pooling\x94\x8c4data/initialpool/2022_pdmH1N1library_initialPool.csv\x94\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(h\xdfh\xe0h\xe1h\xe2h\xe3K\x14h\xe4K\x04h\xe5h\xe6u\x8c#default_process_plate_qc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(h\xccK\x01h\xcd]\x94(G?\xe0\x00\x00\x00\x00\x00\x00K\x01eh\xcfK\x00h\xd0]\x94(G?\xe9\x99\x99\x99\x99\x99\x9aK\neu\x8c!default_process_plate_curvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9]\x94h\xdb]\x94u\x8c\x06plates\x94}\x94(\x8c\x06plate1\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate1_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate2\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate3\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x02\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate3_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate4\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x02\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate4_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate5\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x04\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate5_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10TCTGTTCCGGCCCGAA\x94\x8c\nD10042d182\x94eas\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate6\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x04\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate6_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94(\x8c\rbarcode_wells\x94]\x94]\x94(\x8c\x10TAATGAGCTTTATGGT\x94\x8c\x02F5\x94ea\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10ACGACATGATCAAACG\x94\x8c\nD10212d182\x94eau\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate7\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate7_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate8\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate8_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94(\x8c\rbarcode_wells\x94]\x94(]\x94(\x8c\x10TAGTATAATAGAGCAG\x94\x8c\x02D5\x94e]\x94(\x8c\x10CAGTTCTGCGACCAGC\x94\x8c\x02D9\x94ee\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10ACGGAATCCCCTGAGA\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GGATAAGAAAACTACT\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GTAACATTATACGATT\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GACTCAATAATCACAC\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10CTATTAATCATGCAAA\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10TGGAATCGTCACCGAT\x94\x8c\tD10396d30\x94eeu\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x06plate9\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate9_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10CGGATAAAAATGATAT\x94\x8c\tD10417d30\x94eas\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x07plate10\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x06\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate10_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10CGGATAAAAATGATAT\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10GTTTGACAATCACTAC\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10AGCAGCCTGAAAATAT\x94\x8c\tD10175d30\x94e]\x94(\x8c\x10GACTCAATAATCACAC\x94\x8c\nD10175d182\x94ees\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x07plate11\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10ACGGAATCCCCTGAGA\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10GATCCGTACTTTGATT\x94\x8c\x08D10256d0\x94e]\x94(\x8c\x10CATCAACCGCCATTTC\x94\x8c\x08D10256d0\x94ees\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uu\x8c\x07plate13\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x0c\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate13_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uuj\\\x01\x00\x00}\x94(h\xach\xadh\xaej\xec\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94h\xb0h\xb1h\xb2h\xb3h\xb4h\xb5h\xb6}\x94h\xb8}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06uh\xca}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00uh\xd2}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00uh\xdd}\x94(h\xe7h\xe8h\xe9K\x01uu\x8c\x0eplate14halflib\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\'data/plates/plate14fhalflib_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uu\x8c\x0cplate14no5a1\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c$data/plates/plate14no5a1_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uu\x8c\x0cplate14no5a2\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\xec\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c$data/plates/plate14no5a2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xbaM\xe8\x03h\xbbG?tz\xe1G\xae\x14{h\xbc}\x94(h\xbeG?@bM\xd2\xf1\xa9\xfch\xbfK\x04h\xc0K\x02uh\xc1}\x94(h\xc3G?tz\xe1G\xae\x14{h\xc4K\x04h\xc5K\x02uh\xc6M\xe8\x03h\xc7M\xf4\x01h\xc8K\x05h\xc9K\x06u\x8c\x0fcurvefit_params\x94}\x94(h\xccK\x01h\xcdj\xdc\x01\x00\x00h\xcfK\x00h\xd0j\xdd\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(h\xd4G?\xe0\x00\x00\x00\x00\x00\x00h\xd5}\x94(h\xd7G?\xe3333333h\xd8G?\xb9\x99\x99\x99\x99\x99\x9auh\xd9j\xe1\x01\x00\x00h\xdbj\xe2\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uuu\x8c\x16default_serum_titer_as\x94\x8c\x08midpoint\x94\x8c\x1bdefault_serum_qc_thresholds\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\n\x8c\x11viruses_ignore_qc\x94]\x94u\x8c\x16sera_override_defaults\x94}\x94u\x8c\x04rule\x94\x8c\rprocess_plate\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8cs/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax');
######## snakemake preamble end #########
Process plate counts to get fraction infectivities and fit curves¶
This notebook is designed to be run using snakemake
, and analyzes a plate of sequencing-based neutralization assays.
The plots generated by this notebook are interactive, so you can mouseover points for details, use the mouse-scroll to zoom and pan, and use interactive dropdowns at the bottom of the plots.
Setup¶
Import Python modules:
import pickle
import sys
import altair as alt
import matplotlib.pyplot as plt
import neutcurve
import numpy
import pandas as pd
import ruamel.yaml as yaml
_ = alt.data_transformers.disable_max_rows()
Get the variables passed by snakemake
:
count_csvs = snakemake.input.count_csvs
fate_csvs = snakemake.input.fate_csvs
viral_library_csv = snakemake.input.viral_library_csv
neut_standard_set_csv = snakemake.input.neut_standard_set_csv
qc_drops_yaml = snakemake.output.qc_drops
frac_infectivity_csv = snakemake.output.frac_infectivity_csv
fits_csv = snakemake.output.fits_csv
fits_pickle = snakemake.output.fits_pickle
samples = snakemake.params.samples
plate = snakemake.wildcards.plate
plate_params = snakemake.params.plate_params
# get thresholds turning lists into tuples as needed
manual_drops = {
filter_type: [tuple(w) if isinstance(w, list) else w for w in filter_drops]
for (filter_type, filter_drops) in plate_params["manual_drops"].items()
}
group = plate_params["group"]
qc_thresholds = plate_params["qc_thresholds"]
curvefit_params = plate_params["curvefit_params"]
curvefit_qc = plate_params["curvefit_qc"]
curvefit_qc["barcode_serum_replicates_ignore_curvefit_qc"] = [
tuple(w) for w in curvefit_qc["barcode_serum_replicates_ignore_curvefit_qc"]
]
print(f"Processing {plate=}")
samples_df = pd.DataFrame(plate_params["samples"])
print(f"\nPlate has {len(samples)} samples (wells)")
assert all(
(len(samples_df) == samples_df[c].nunique())
for c in ["well", "sample", "sample_noplate"]
)
assert len(samples_df) == len(
samples_df.groupby(["serum_replicate", "dilution_factor"])
)
assert len(samples) == len(count_csvs) == len(fate_csvs) == len(samples_df)
for d, key, title in [
(manual_drops, "manual_drops", "Data manually specified to drop:"),
(qc_thresholds, "qc_thresholds", "QC thresholds applied to data:"),
(curvefit_params, "curvefit_params", "Curve-fitting parameters:"),
(curvefit_qc, "curvefit_qc", "Curve-fitting QC:"),
]:
print(f"\n{title}")
yaml.YAML(typ="rt").dump({key: d}, stream=sys.stdout)
Processing plate='plate14lib' Plate has 24 samples (wells) Data manually specified to drop: manual_drops: {}
QC thresholds applied to data: qc_thresholds: avg_barcode_counts_per_well: 1000 min_neut_standard_frac_per_well: 0.005 no_serum_per_viral_barcode_filters: min_frac: 0.0005 max_fold_change: 4 max_wells: 2 per_neut_standard_barcode_filters: min_frac: 0.005 max_fold_change: 4 max_wells: 2 min_neut_standard_count_per_well: 1000 min_no_serum_count_per_viral_barcode_well: 500 max_frac_infectivity_per_viral_barcode_well: 5 min_dilutions_per_barcode_serum_replicate: 6
Curve-fitting parameters: curvefit_params: frac_infectivity_ceiling: 1 fixtop: - 0.5 - 1 fixbottom: 0 fixslope: - 0.8 - 10
Curve-fitting QC: curvefit_qc: max_frac_infectivity_at_least: 0.5 goodness_of_fit: min_R2: 0.6 max_RMSD: 0.1 serum_replicates_ignore_curvefit_qc: [] barcode_serum_replicates_ignore_curvefit_qc: []
Set up dictionary to keep track of wells, barcodes, well-barcodes, and serum-replicates that are dropped:
qc_drops = {
"wells": {},
"barcodes": {},
"barcode_wells": {},
"barcode_serum_replicates": {},
"serum_replicates": {},
}
assert set(manual_drops).issubset(
qc_drops
), f"{manual_drops.keys()=}, {qc_drops.keys()}"
Statistics on barcode-parsing for each sample¶
Make interactive chart of the "fates" of the sequencing reads parsed for each sample on the plate.
If most sequencing reads are not "valid barcodes", this could potentially indicate some problem in the sequencing or barcode set you are parsing.
Potential fates are:
- valid barcode: barcode that matches a known virus or neutralization standard, we hope most reads are this.
- invalid barcode: a barcode with proper flanking sequences, but does not match a known virus or neutralization standard. If you have a lot of reads of this type, it is probably a good idea to look at the invalid barcode CSVs (in the
./results/barcode_invalid/
subdirectory created by the pipeline) to see what these invalid barcodes are. - unparseable barcode: could not parse a barcode from this read as there was not a sequence of the correct length with the appropriate flanking sequence.
- invalid outer flank: if using an outer upstream or downstream region (
upstream2
ordownstream2
for the illuminabarcodeparser), reads that are otherwise valid except for this outer flank. Typically you would be usingupstream2
if you have a plate index embedded in your primer, and reads with this classification correspond to a different index than the one for this plate. - low quality barcode: low-quality or
N
nucleotides in barcode, could indicate problem with sequencing. - failed chastity filter: reads that failed the Illumina chastity filter, if these are reported in the FASTQ (they may not be).
Also, if the number of reads per sample is very uneven, that could indicate that you did not do a good job of balancing the different samples in the Illumina sequencing.
fates = (
pd.concat([pd.read_csv(f).assign(sample=s) for f, s in zip(fate_csvs, samples)])
.merge(samples_df, validate="many_to_one", on="sample")
.assign(
fate_counts=lambda x: x.groupby("fate")["count"].transform("sum"),
sample_well=lambda x: x["sample_noplate"] + " (" + x["well"] + ")",
)
.query("fate_counts > 0")[ # only keep fates with at least one count
["fate", "count", "well", "serum_replicate", "sample_well", "dilution_factor"]
]
)
assert len(fates) == len(fates.drop_duplicates())
serum_replicates = sorted(fates["serum_replicate"].unique())
sample_wells = list(
fates.sort_values(["serum_replicate", "dilution_factor"])["sample_well"]
)
serum_selection = alt.selection_point(
fields=["serum_replicate"],
bind=alt.binding_select(
options=[None] + serum_replicates,
labels=["all"] + serum_replicates,
name="serum",
),
)
fates_chart = (
alt.Chart(fates)
.add_params(serum_selection)
.transform_filter(serum_selection)
.encode(
alt.X("count", scale=alt.Scale(nice=False, padding=3)),
alt.Y(
"sample_well",
title=None,
sort=sample_wells,
),
alt.Color("fate", sort=sorted(fates["fate"].unique(), reverse=True)),
alt.Order("fate", sort="descending"),
tooltip=fates.columns.tolist(),
)
.mark_bar(height={"band": 0.85})
.properties(
height=alt.Step(10),
width=200,
title=f"Barcode parsing for {plate}",
)
.configure_axis(grid=False)
)
fates_chart
Read barcode counts and apply manually specified drops¶
Read the counts per barcode:
# get barcode counts
counts = (
pd.concat([pd.read_csv(c).assign(sample=s) for c, s in zip(count_csvs, samples)])
.merge(samples_df, validate="many_to_one", on="sample")
.drop(columns=["replicate", "plate", "fastq"])
.assign(sample_well=lambda x: x["sample_noplate"] + " (" + x["well"] + ")")
)
# classify barcodes as viral or neut standard
barcode_class = pd.concat(
[
pd.read_csv(viral_library_csv)[["barcode", "strain"]].assign(
neut_standard=False,
),
pd.read_csv(neut_standard_set_csv)[["barcode"]].assign(
neut_standard=True,
strain=pd.NA,
),
],
ignore_index=True,
)
# merge counts and classification of barcodes
assert set(counts["barcode"]) == set(barcode_class["barcode"])
counts = counts.merge(barcode_class, on="barcode", validate="many_to_one")
assert set(sample_wells) == set(counts["sample_well"])
assert set(serum_replicates) == set(counts["serum_replicate"])
Apply any manually specified data drops:
for filter_type, filter_drops in manual_drops.items():
print(f"\nDropping {len(filter_drops)} {filter_type} specified in manual_drops")
assert filter_type in qc_drops
qc_drops[filter_type].update(
{w: "manual_drop" for w in filter_drops if not isinstance(w, list)}
)
if filter_type == "barcode_wells":
counts = counts[
~counts.assign(
barcode_well=lambda x: x.apply(
lambda r: (r["barcode"], r["well"]), axis=1
)
)["barcode_well"].isin(qc_drops[filter_type])
]
elif filter_type == "barcode_serum_replicates":
counts = counts[
~counts.assign(
barcode_serum_replicate=lambda x: x.apply(
lambda r: (r["barcode"], r["serum_replicate"]), axis=1
)
)["barcode_serum_replicate"].isin(qc_drops[filter_type])
]
elif filter_type == "wells":
counts = counts[~counts["well"].isin(qc_drops[filter_type])]
elif filter_type == "barcodes":
counts = counts[~counts["barcode"].isin(qc_drops[filter_type])]
else:
assert filter_type in set(counts.columns)
counts = counts[~counts[filter_type].isin(qc_drops[filter_type])]
Average counts per barcode in each well¶
Plot average counts per barcode. If a sample has inadequate barcode counts, it may not have good enough statistics for accurate analysis, and a QC-threshold is applied:
avg_barcode_counts = (
counts.groupby(
["well", "serum_replicate", "sample_well"],
dropna=False,
as_index=False,
)
.aggregate(avg_count=pd.NamedAgg("count", "mean"))
.assign(
fails_qc=lambda x: (
x["avg_count"] < qc_thresholds["avg_barcode_counts_per_well"]
),
)
)
avg_barcode_counts_chart = (
alt.Chart(avg_barcode_counts)
.add_params(serum_selection)
.transform_filter(serum_selection)
.encode(
alt.X(
"avg_count",
title="average barcode counts per well",
scale=alt.Scale(nice=False, padding=3),
),
alt.Y("sample_well", sort=sample_wells),
alt.Color(
"fails_qc",
title=f"fails {qc_thresholds['avg_barcode_counts_per_well']=}",
legend=alt.Legend(titleLimit=500),
),
tooltip=[
alt.Tooltip(c, format=".3g") if avg_barcode_counts[c].dtype == float else c
for c in avg_barcode_counts.columns
],
)
.mark_bar(height={"band": 0.85})
.properties(
height=alt.Step(10),
width=250,
title=f"Average barcode counts per well for {plate}",
)
.configure_axis(grid=False)
)
display(avg_barcode_counts_chart)
# drop wells failing QC
avg_barcode_counts_per_well_drops = list(avg_barcode_counts.query("fails_qc")["well"])
print(
f"\nDropping {len(avg_barcode_counts_per_well_drops)} wells for failing "
f"{qc_thresholds['avg_barcode_counts_per_well']=}: "
+ str(avg_barcode_counts_per_well_drops)
)
qc_drops["wells"].update(
{w: "avg_barcode_counts_per_well" for w in avg_barcode_counts_per_well_drops}
)
counts = counts[~counts["well"].isin(qc_drops["wells"])]
Dropping 0 wells for failing qc_thresholds['avg_barcode_counts_per_well']=1000: []
Fraction of counts from neutralization standard¶
Determine the fraction of counts from the neutralization standard in each sample, and make sure this fraction passess the QC threshold.
neut_standard_fracs = (
counts.assign(
neut_standard_count=lambda x: x["count"] * x["neut_standard"].astype(int)
)
.groupby(
["well", "serum_replicate", "sample_well"],
dropna=False,
as_index=False,
)
.aggregate(
total_count=pd.NamedAgg("count", "sum"),
neut_standard_count=pd.NamedAgg("neut_standard_count", "sum"),
)
.assign(
neut_standard_frac=lambda x: x["neut_standard_count"] / x["total_count"],
fails_qc=lambda x: (
x["neut_standard_frac"] < qc_thresholds["min_neut_standard_frac_per_well"]
),
)
)
neut_standard_fracs_chart = (
alt.Chart(neut_standard_fracs)
.add_params(serum_selection)
.transform_filter(serum_selection)
.encode(
alt.X(
"neut_standard_frac",
title="frac counts from neutralization standard per well",
scale=alt.Scale(nice=False, padding=3),
),
alt.Y("sample_well", sort=sample_wells),
alt.Color(
"fails_qc",
title=f"fails {qc_thresholds['min_neut_standard_frac_per_well']=}",
legend=alt.Legend(titleLimit=500),
),
tooltip=[
alt.Tooltip(c, format=".3g") if neut_standard_fracs[c].dtype == float else c
for c in neut_standard_fracs.columns
],
)
.mark_bar(height={"band": 0.85})
.properties(
height=alt.Step(10),
width=250,
title=f"Neutralization-standard fracs per well for {plate}",
)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
)
display(neut_standard_fracs_chart)
# drop wells failing QC
min_neut_standard_frac_per_well_drops = list(
neut_standard_fracs.query("fails_qc")["well"]
)
print(
f"\nDropping {len(min_neut_standard_frac_per_well_drops)} wells for failing "
f"{qc_thresholds['min_neut_standard_frac_per_well']=}: "
+ str(min_neut_standard_frac_per_well_drops)
)
qc_drops["wells"].update(
{
w: "min_neut_standard_frac_per_well"
for w in min_neut_standard_frac_per_well_drops
}
)
counts = counts[~counts["well"].isin(qc_drops["wells"])]
Dropping 0 wells for failing qc_thresholds['min_neut_standard_frac_per_well']=0.005: []
Consistency and minimum fractions for barcodes¶
We examine the fraction of counts attributable to each barcode. We do this splitting the data two ways:
Looking at all viral (but not neut-standard) barcodes only for the no-serum samples (wells).
Looking at just the neut-standard barcodes for all samples (wells).
The reasons is that if the experiment is set up perfectly, these fractions should be the same across all samples for each barcode. (We do not expect viral barcodes to have consistent fractions across no-serum samples as they will be neutralized differently depending on strain).
We plot these fractions in interactive plots (you can mouseover points and zoom) so you can identify barcodes that fail the expected consistency QC thresholds.
We also make sure the barcodes meet specified QC minimum thresholds for all samples, and flag any that do not.
barcode_selection = alt.selection_point(fields=["barcode"], on="mouseover", empty=False)
# look at all samples for neut standard barcodes, or no-serum samples for all barcodes
for is_neut_standard, df in counts.groupby("neut_standard"):
if is_neut_standard:
print(
f"\n\n{'=' * 89}\nAnalyzing neut-standard barcodes from all samples (wells)"
)
qc_name = "per_neut_standard_barcode_filters"
else:
print(f"\n\n{'=' * 89}\nAnalyzing all barcodes from no-serum samples (wells)")
qc_name = "no_serum_per_viral_barcode_filters"
df = df.query("serum == 'none'")
df = df.assign(
sample_counts=lambda x: x.groupby("sample")["count"].transform("sum"),
count_frac=lambda x: x["count"] / x["sample_counts"],
median_count_frac=lambda x: x.groupby("barcode")["count_frac"].transform(
"median"
),
fold_change_from_median=lambda x: numpy.where(
x["count_frac"] > x["median_count_frac"],
x["count_frac"] / x["median_count_frac"],
x["median_count_frac"] / x["count_frac"],
),
)[
[
"barcode",
"count",
"well",
"sample_well",
"count_frac",
"median_count_frac",
"fold_change_from_median",
]
+ ([] if is_neut_standard else ["strain"])
]
# barcode fails QC if fails in sufficient wells
qc = qc_thresholds[qc_name]
print(f"Apply QC {qc_name}: {qc}\n")
fails_qc = (
df.assign(
fails_qc=lambda x: ~(
(x["count_frac"] >= qc["min_frac"])
& (x["fold_change_from_median"] <= qc["max_fold_change"])
),
)
.groupby("barcode", as_index=False)
.aggregate(n_wells_fail_qc=pd.NamedAgg("fails_qc", "sum"))
.assign(fails_qc=lambda x: x["n_wells_fail_qc"] >= qc["max_wells"])[
["barcode", "fails_qc"]
]
)
df = df.merge(fails_qc, on="barcode", validate="many_to_one")
# make chart
evenness_chart = (
alt.Chart(df)
.add_params(barcode_selection)
.encode(
alt.X(
"count_frac",
title=(
"barcode's fraction of neut standard counts"
if is_neut_standard
else "barcode's fraction of non-neut standard counts"
),
scale=alt.Scale(nice=False, padding=5),
),
alt.Y("sample_well", sort=sample_wells),
alt.Fill(
"fails_qc",
title=f"fails {qc_name}",
legend=alt.Legend(titleLimit=500),
),
strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
tooltip=[
alt.Tooltip(c, format=".2g") if df[c].dtype == float else c
for c in df.columns
],
)
.mark_circle(fillOpacity=0.45, stroke="black", strokeOpacity=1)
.properties(
height=alt.Step(10),
width=300,
title=alt.TitleParams(
(
f"{plate} all samples, neut-standard barcodes"
if is_neut_standard
else f"{plate} no-serum samples, all barcodes"
),
subtitle="x-axis is zoomable (use mouse scroll/pan)",
),
)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
.interactive()
)
display(evenness_chart)
# drop barcodes failing QC
barcode_drops = list(fails_qc.query("fails_qc")["barcode"])
print(
f"\nDropping {len(barcode_drops)} barcodes for failing {qc=}: {barcode_drops}"
)
qc_drops["barcodes"].update(
{bc: "min_neut_standard_frac_per_well" for bc in barcode_drops}
)
counts = counts[~counts["barcode"].isin(qc_drops["barcodes"])]
========================================================================================= Analyzing all barcodes from no-serum samples (wells) Apply QC no_serum_per_viral_barcode_filters: {'min_frac': 0.0005, 'max_fold_change': 4, 'max_wells': 2}
Dropping 0 barcodes for failing qc={'min_frac': 0.0005, 'max_fold_change': 4, 'max_wells': 2}: [] ========================================================================================= Analyzing neut-standard barcodes from all samples (wells) Apply QC per_neut_standard_barcode_filters: {'min_frac': 0.005, 'max_fold_change': 4, 'max_wells': 2}
Dropping 0 barcodes for failing qc={'min_frac': 0.005, 'max_fold_change': 4, 'max_wells': 2}: []
Compute fraction infectivity¶
The fraction infectivity for viral barcode $v_b$ in sample $s$ is computed as: $$ F_{v_b,s} = \frac{c_{v_b,s} / \left(\sum_{n_b} c_{n_b,s}\right)}{{\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]} $$ where
- $c_{v_b,s}$ is the counts of viral barcode $v_b$ in sample $s$.
- $\sum_{n_b} c_{n_b,s}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for sample $s$.
- $c_{v_b,s_0}$ is the counts of viral barcode $v_b$ in no-serum sample $s_0$.
- $\sum_{n_b} c_{n_b,s_0}$ is the sum of the counts for all neutralization standard barcodes $n_b$ for no-serum sample $s_0$.
- ${\rm median}_{s_0}\left[ c_{v_b,s_0} / \left(\sum_{n_b} c_{n_b,s_0}\right)\right]$ is the median taken across all no-serum samples of the counts of viral barcode $v_b$ versus the total counts for all neutralization standard barcodes.
First, compute the total neutralization-standard counts for each sample (well). Plot these, and drop any wells that do not meet the QC threshold.
neut_standard_counts = (
counts.query("neut_standard")
.groupby(
["well", "serum_replicate", "sample_well", "dilution_factor"],
dropna=False,
as_index=False,
)
.aggregate(neut_standard_count=pd.NamedAgg("count", "sum"))
.assign(
fails_qc=lambda x: (
x["neut_standard_count"] < qc_thresholds["min_neut_standard_count_per_well"]
),
)
)
neut_standard_counts_chart = (
alt.Chart(neut_standard_counts)
.add_params(serum_selection)
.transform_filter(serum_selection)
.encode(
alt.X(
"neut_standard_count",
title="counts from neutralization standard",
scale=alt.Scale(nice=False, padding=3),
),
alt.Y("sample_well", sort=sample_wells),
alt.Color(
"fails_qc",
title=f"fails {qc_thresholds['min_neut_standard_count_per_well']=}",
legend=alt.Legend(titleLimit=500),
),
tooltip=[
(
alt.Tooltip(c, format=".3g")
if neut_standard_counts[c].dtype == float
else c
)
for c in neut_standard_counts.columns
],
)
.mark_bar(height={"band": 0.85})
.properties(
height=alt.Step(10),
width=250,
title=f"Neutralization-standard counts for {plate}",
)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
)
display(neut_standard_counts_chart)
# drop wells failing QC
min_neut_standard_count_per_well_drops = list(
neut_standard_counts.query("fails_qc")["well"]
)
print(
f"\nDropping {len(min_neut_standard_count_per_well_drops)} wells for failing "
f"{qc_thresholds['min_neut_standard_count_per_well']=}: "
+ str(min_neut_standard_count_per_well_drops)
)
qc_drops["wells"].update(
{
w: "min_neut_standard_count_per_well"
for w in min_neut_standard_count_per_well_drops
}
)
neut_standard_counts = neut_standard_counts[
~neut_standard_counts["well"].isin(qc_drops["wells"])
]
counts = counts[~counts["well"].isin(qc_drops["wells"])]
Dropping 0 wells for failing qc_thresholds['min_neut_standard_count_per_well']=1000: []
Compute and plot the no-serum sample viral barcode counts and check if they pass the QC filters.
no_serum_counts = (
counts.query("serum == 'none'")
.query("not neut_standard")
.merge(neut_standard_counts, validate="many_to_one")[
["barcode", "strain", "well", "sample_well", "count", "neut_standard_count"]
]
.assign(
fails_qc=lambda x: (
x["count"] <= qc_thresholds["min_no_serum_count_per_viral_barcode_well"]
),
)
)
strains = sorted(no_serum_counts["strain"].unique())
strain_selection_dropdown = alt.selection_point(
fields=["strain"],
bind=alt.binding_select(
options=[None] + strains,
labels=["all"] + strains,
name="virus strain",
),
)
# make chart
no_serum_counts_chart = (
alt.Chart(no_serum_counts)
.add_params(barcode_selection, strain_selection_dropdown)
.transform_filter(strain_selection_dropdown)
.encode(
alt.X(
"count", title="viral barcode count", scale=alt.Scale(nice=False, padding=5)
),
alt.Y("sample_well", sort=sample_wells),
alt.Fill(
"fails_qc",
title=f"fails {qc_thresholds['min_no_serum_count_per_viral_barcode_well']=}",
legend=alt.Legend(titleLimit=500),
),
strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
size=alt.condition(barcode_selection, alt.value(60), alt.value(35)),
tooltip=no_serum_counts.columns.tolist(),
)
.mark_circle(fillOpacity=0.6, stroke="black", strokeOpacity=1)
.properties(
height=alt.Step(10),
width=400,
title=f"{plate} viral barcode counts in no-serum samples",
)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
.interactive()
)
display(no_serum_counts_chart)
# drop barcode / wells failing QC
min_no_serum_count_per_viral_barcode_well_drops = list(
no_serum_counts.query("fails_qc")[["barcode", "well"]].itertuples(
index=False, name=None
)
)
print(
f"\nDropping {len(min_no_serum_count_per_viral_barcode_well_drops)} barcode-wells for failing "
f"{qc_thresholds['min_no_serum_count_per_viral_barcode_well']=}: "
+ str(min_no_serum_count_per_viral_barcode_well_drops)
)
qc_drops["barcode_wells"].update(
{
w: "min_no_serum_count_per_viral_barcode_well"
for w in min_no_serum_count_per_viral_barcode_well_drops
}
)
no_serum_counts = no_serum_counts[
~no_serum_counts.assign(
barcode_well=lambda x: x.apply(lambda r: (r["barcode"], r["well"]), axis=1)
)["barcode_well"].isin(qc_drops["barcode_wells"])
]
counts = counts[
~counts.assign(
barcode_well=lambda x: x.apply(lambda r: (r["barcode"], r["well"]), axis=1)
)["barcode_well"].isin(qc_drops["barcode_wells"])
]
Dropping 2 barcode-wells for failing qc_thresholds['min_no_serum_count_per_viral_barcode_well']=500: [('TTGTCCCGAGACAACA', 'H1'), ('ACGGAATCCCCTGAGA', 'G12')]
Compute and plot the median ratio of viral barcode count to neut standard counts across no-serum samples. If library composition is equal, all of these values should be similar:
median_no_serum_ratio = (
no_serum_counts.assign(ratio=lambda x: x["count"] / x["neut_standard_count"])
.groupby(["barcode", "strain"], as_index=False)
.aggregate(median_no_serum_ratio=pd.NamedAgg("ratio", "median"))
)
strain_selection = alt.selection_point(fields=["strain"], on="mouseover", empty=False)
median_no_serum_ratio_chart = (
alt.Chart(median_no_serum_ratio)
.add_params(strain_selection)
.encode(
alt.X(
"median_no_serum_ratio",
title="median ratio of counts",
scale=alt.Scale(nice=False, padding=5),
),
alt.Y(
"barcode",
sort=alt.SortField("median_no_serum_ratio", order="descending"),
axis=alt.Axis(labelFontSize=5),
),
color=alt.condition(strain_selection, alt.value("orange"), alt.value("gray")),
tooltip=[
(
alt.Tooltip(c, format=".3g")
if median_no_serum_ratio[c].dtype == float
else c
)
for c in median_no_serum_ratio.columns
],
)
.mark_bar(height={"band": 0.85})
.properties(
height=alt.Step(5),
width=250,
title=f"{plate} no-serum median ratio viral barcode to neut-standard barcode",
)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
)
display(median_no_serum_ratio_chart)
Compute the actual fraction infectivities. We compute both the raw fraction infectivities and the ones with the ceiling applied:
frac_infectivity = (
counts.query("not neut_standard")
.query("serum != 'none'")
.merge(median_no_serum_ratio, validate="many_to_one")
.merge(neut_standard_counts, validate="many_to_one")
.assign(
frac_infectivity_raw=lambda x: (
(x["count"] / x["neut_standard_count"]) / x["median_no_serum_ratio"]
),
frac_infectivity_ceiling=lambda x: x["frac_infectivity_raw"].clip(
upper=curvefit_params["frac_infectivity_ceiling"]
),
concentration=lambda x: 1 / x["dilution_factor"],
plate_barcode=lambda x: x["plate_replicate"] + "-" + x["barcode"],
)[
[
"barcode",
"plate_barcode",
"well",
"strain",
"serum",
"serum_replicate",
"dilution_factor",
"concentration",
"frac_infectivity_raw",
"frac_infectivity_ceiling",
]
]
)
assert len(
frac_infectivity.groupby(["serum", "plate_barcode", "dilution_factor"])
) == len(frac_infectivity)
assert frac_infectivity["dilution_factor"].notnull().all()
assert frac_infectivity["frac_infectivity_raw"].notnull().all()
assert frac_infectivity["frac_infectivity_ceiling"].notnull().all()
Plot the fraction infectivities, both the raw values and with the ceiling applied:
frac_infectivity_chart_df = (
frac_infectivity.assign(
fails_qc=lambda x: (
x["frac_infectivity_raw"]
> qc_thresholds["max_frac_infectivity_per_viral_barcode_well"]
),
)
.melt(
id_vars=[
"barcode",
"strain",
"well",
"serum_replicate",
"dilution_factor",
"fails_qc",
],
value_vars=["frac_infectivity_raw", "frac_infectivity_ceiling"],
var_name="ceiling_applied",
value_name="frac_infectivity",
)
.assign(
ceiling_applied=lambda x: x["ceiling_applied"].map(
{
"frac_infectivity_raw": "raw fraction infectivity",
"frac_infectivity_ceiling": f"fraction infectivity with ceiling at {curvefit_params['frac_infectivity_ceiling']}",
}
)
)
)
frac_infectivity_chart = (
alt.Chart(frac_infectivity_chart_df)
.add_params(strain_selection_dropdown, barcode_selection)
.transform_filter(strain_selection_dropdown)
.encode(
alt.X(
"dilution_factor",
title="dilution factor",
scale=alt.Scale(nice=False, padding=5, type="log"),
),
alt.Y(
"frac_infectivity",
title="fraction infectivity",
scale=alt.Scale(nice=False, padding=5),
),
alt.Column(
"ceiling_applied",
sort="descending",
title=None,
header=alt.Header(labelFontSize=13, labelFontStyle="bold", labelPadding=2),
),
alt.Row(
"serum_replicate",
title=None,
spacing=3,
header=alt.Header(labelFontSize=13, labelFontStyle="bold"),
),
alt.Detail("barcode"),
alt.Shape(
"fails_qc",
title=f"fails {qc_thresholds['max_frac_infectivity_per_viral_barcode_well']=}",
legend=alt.Legend(titleLimit=500, orient="bottom"),
),
color=alt.condition(
barcode_selection, alt.value("black"), alt.value("MediumBlue")
),
strokeWidth=alt.condition(barcode_selection, alt.value(3), alt.value(1)),
opacity=alt.condition(barcode_selection, alt.value(1), alt.value(0.25)),
tooltip=[
(
alt.Tooltip(c, format=".3g")
if frac_infectivity_chart_df[c].dtype == float
else c
)
for c in frac_infectivity_chart_df.columns
],
)
.mark_line(point=True)
.properties(
height=150,
width=250,
title=f"Fraction infectivities for {plate}",
)
.interactive(bind_x=False)
.configure_axis(grid=False)
.configure_legend(titleLimit=1000)
.configure_point(size=50)
.resolve_scale(x="independent", y="independent")
)
display(frac_infectivity_chart)
# drop barcode / wells failing QC
max_frac_infectivity_per_viral_barcode_well_drops = list(
frac_infectivity_chart_df.query("fails_qc")[["barcode", "well"]]
.drop_duplicates()
.itertuples(index=False, name=None)
)
print(
f"\nDropping {len(max_frac_infectivity_per_viral_barcode_well_drops)} barcode-wells for failing "
f"{qc_thresholds['max_frac_infectivity_per_viral_barcode_well']=}: "
+ str(max_frac_infectivity_per_viral_barcode_well_drops)
)
qc_drops["barcode_wells"].update(
{
w: "max_frac_infectivity_per_viral_barcode_well"
for w in max_frac_infectivity_per_viral_barcode_well_drops
}
)
frac_infectivity = frac_infectivity[
~frac_infectivity.assign(
barcode_well=lambda x: x.apply(lambda r: (r["barcode"], r["well"]), axis=1)
)["barcode_well"].isin(qc_drops["barcode_wells"])
]
Dropping 0 barcode-wells for failing qc_thresholds['max_frac_infectivity_per_viral_barcode_well']=5: []
Check how many dilutions we have per barcode / serum-replicate:
n_dilutions = (
frac_infectivity.groupby(["serum_replicate", "strain", "barcode"], as_index=False)
.aggregate(**{"number of dilutions": pd.NamedAgg("dilution_factor", "nunique")})
.assign(
fails_qc=lambda x: (
x["number of dilutions"]
< qc_thresholds["min_dilutions_per_barcode_serum_replicate"]
),
)
)
n_dilutions_chart = (
alt.Chart(n_dilutions)
.add_params(barcode_selection)
.encode(
alt.X("number of dilutions", scale=alt.Scale(nice=False, padding=4)),
alt.Y("strain", title=None),
alt.Column(
"serum_replicate",
title=None,
header=alt.Header(labelFontSize=12, labelFontStyle="bold", labelPadding=0),
),
alt.Fill(
"fails_qc",
title=f"fails {qc_thresholds['min_dilutions_per_barcode_serum_replicate']=}",
legend=alt.Legend(titleLimit=500, orient="bottom"),
),
strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
size=alt.condition(barcode_selection, alt.value(55), alt.value(35)),
tooltip=[
alt.Tooltip(c, format=".3g") if n_dilutions[c].dtype == float else c
for c in n_dilutions.columns
],
)
.mark_circle(stroke="black", strokeOpacity=1, fillOpacity=0.45)
.properties(
height=alt.Step(10),
width=120,
title=alt.TitleParams(
"number of dilutions for each barcode for each serum-replicate", dy=-2
),
)
)
display(n_dilutions_chart)
# drop barcode / serum-replicates failing QC
min_dilutions_per_barcode_serum_replicate_drops = list(
n_dilutions.query("fails_qc")[["barcode", "serum_replicate"]].itertuples(
index=False, name=None
)
)
print(
f"\nDropping {len(min_dilutions_per_barcode_serum_replicate_drops)} barcode/serum-replicates for failing "
f"{qc_thresholds['min_dilutions_per_barcode_serum_replicate']=}: "
+ str(min_dilutions_per_barcode_serum_replicate_drops)
)
qc_drops["barcode_serum_replicates"].update(
{
w: "min_dilutions_per_barcode_serum_replicate"
for w in min_dilutions_per_barcode_serum_replicate_drops
}
)
frac_infectivity = frac_infectivity[
~frac_infectivity.assign(
barcode_serum_replicate=lambda x: x.apply(
lambda r: (r["barcode"], r["serum_replicate"]), axis=1
)
)["barcode_serum_replicate"].isin(qc_drops["barcode_serum_replicates"])
]
Dropping 0 barcode/serum-replicates for failing qc_thresholds['min_dilutions_per_barcode_serum_replicate']=6: []
Fit neutralization curves without applying QC to curves¶
First fit curves to all serum replicates, then we will apply QC on the curve fits. Note that the fitting is done to the fraction infectivities with the ceiling:
fits_noqc = neutcurve.CurveFits(
frac_infectivity.rename(
columns={
"frac_infectivity_ceiling": "fraction infectivity",
"concentration": "serum concentration",
}
),
conc_col="serum concentration",
fracinf_col="fraction infectivity",
virus_col="strain",
serum_col="serum_replicate",
replicate_col="barcode",
fixtop=curvefit_params["fixtop"],
fixbottom=curvefit_params["fixbottom"],
fixslope=curvefit_params["fixslope"],
)
Determine which fits fail the curve fitting QC, and plot them. Note the plot indicates as failing QC any barcode / serum-replicate that fails, even if we are also specified to ignore the QC for that one (so it will not be removed later):
goodness_of_fit = curvefit_qc["goodness_of_fit"]
fit_params_noqc = (
frac_infectivity.groupby(["serum_replicate", "barcode"], as_index=False)
.aggregate(max_frac_infectivity=pd.NamedAgg("frac_infectivity_ceiling", "max"))
.merge(
fits_noqc.fitParams(average_only=False, no_average=True)[
["serum", "virus", "replicate", "r2", "rmsd"]
].rename(columns={"serum": "serum_replicate", "replicate": "barcode"}),
validate="one_to_one",
)
.assign(
fails_max_frac_infectivity_at_least=lambda x: (
x["max_frac_infectivity"] < curvefit_qc["max_frac_infectivity_at_least"]
),
fails_goodness_of_fit=lambda x: (
(x["r2"] < goodness_of_fit["min_R2"])
& (x["rmsd"] > goodness_of_fit["max_RMSD"])
),
fails_qc=lambda x: (
x["fails_max_frac_infectivity_at_least"] | x["fails_goodness_of_fit"]
),
ignore_qc=lambda x: x.apply(
lambda r: (
(
r["serum_replicate"]
in curvefit_qc["serum_replicates_ignore_curvefit_qc"]
)
or (
(r["barcode"], r["serum_replicate"])
in curvefit_qc["barcode_serum_replicates_ignore_curvefit_qc"]
)
),
axis=1,
),
)
)
print(f"Plotting barcode / serum-replicates that fail {curvefit_qc=}\n")
for prop, col in [
("max frac infectivity", "max_frac_infectivity"),
("curve fit R2", "r2"),
("curve fit RMSD", "rmsd"),
]:
fit_params_noqc_chart = (
alt.Chart(fit_params_noqc)
.add_params(barcode_selection)
.encode(
alt.X(col, title=prop, scale=alt.Scale(nice=False, padding=4)),
alt.Y("virus", title=None),
alt.Fill("fails_qc"),
alt.Column(
"serum_replicate",
title=None,
header=alt.Header(
labelFontSize=12, labelFontStyle="bold", labelPadding=0
),
),
strokeWidth=alt.condition(barcode_selection, alt.value(2), alt.value(0)),
size=alt.condition(barcode_selection, alt.value(55), alt.value(35)),
tooltip=[
alt.Tooltip(c, format=".3g") if fit_params_noqc[c].dtype == float else c
for c in fit_params_noqc.columns
],
)
.mark_circle(stroke="black", strokeOpacity=1, fillOpacity=0.55)
.properties(
height=alt.Step(10),
width=120,
title=alt.TitleParams(f"{prop} for each barcode serum-replicate", dy=-2),
)
)
display(fit_params_noqc_chart)
Plotting barcode / serum-replicates that fail curvefit_qc={'max_frac_infectivity_at_least': 0.5, 'goodness_of_fit': {'min_R2': 0.6, 'max_RMSD': 0.1}, 'serum_replicates_ignore_curvefit_qc': [], 'barcode_serum_replicates_ignore_curvefit_qc': []}
Now get all barcode / serum-replicate pairs that fail any of the QC. Plot curves for just these virus / serum-replicates (we plot all barcodes for a virus even if just one fails QC), and then exclude any that are not specified to ignore the QC:
barcode_serum_replicates_fail_qc = fit_params_noqc.query("fails_qc").reset_index(
drop=True
)
print(f"Here are barcode / serum-replicates that fail {curvefit_qc=}")
display(barcode_serum_replicates_fail_qc)
if len(barcode_serum_replicates_fail_qc):
print("\nCurves for viruses and serum-replicates with at least one failed barcode:")
fig, _ = fits_noqc.plotReplicates(
sera=sorted(barcode_serum_replicates_fail_qc["serum_replicate"].unique()),
viruses=sorted(barcode_serum_replicates_fail_qc["virus"].unique()),
attempt_shared_legend=False,
legendfontsize=8,
titlesize=10,
ticksize=10,
ncol=6,
draw_in_bounds=True,
)
display(fig)
plt.close(fig)
# drop barcode / serum-replicates failing QC
for qc_filter in ["max_frac_infectivity_at_least", "goodness_of_fit"]:
fits_qc_drops = list(
fit_params_noqc.query(f"fails_{qc_filter} and (not ignore_qc)")[
["barcode", "serum_replicate"]
].itertuples(index=False, name=None)
)
print(
f"\nDropping {len(fits_qc_drops)} barcode/serum-replicates for failing "
f"{qc_filter}={curvefit_qc[qc_filter]}: " + str(fits_qc_drops)
)
qc_drops["barcode_serum_replicates"].update({w: qc_filter for w in fits_qc_drops})
frac_infectivity = frac_infectivity[
~frac_infectivity.assign(
barcode_serum_replicate=lambda x: x.apply(
lambda r: (r["barcode"], r["serum_replicate"]), axis=1
)
)["barcode_serum_replicate"].isin(qc_drops["barcode_serum_replicates"])
]
fit_params_noqc = fit_params_noqc[
~fit_params_noqc.assign(
barcode_serum_replicate=lambda x: x.apply(
lambda r: (r["barcode"], r["serum_replicate"]), axis=1
)
)["barcode_serum_replicate"].isin(qc_drops["barcode_serum_replicates"])
]
Here are barcode / serum-replicates that fail curvefit_qc={'max_frac_infectivity_at_least': 0.5, 'goodness_of_fit': {'min_R2': 0.6, 'max_RMSD': 0.1}, 'serum_replicates_ignore_curvefit_qc': [], 'barcode_serum_replicates_ignore_curvefit_qc': []}
serum_replicate | barcode | max_frac_infectivity | virus | r2 | rmsd | fails_max_frac_infectivity_at_least | fails_goodness_of_fit | fails_qc | ignore_qc |
---|
Dropping 0 barcode/serum-replicates for failing max_frac_infectivity_at_least=0.5: [] Dropping 0 barcode/serum-replicates for failing goodness_of_fit={'min_R2': 0.6, 'max_RMSD': 0.1}: []
Fit neutralization curves after applying QC¶
No we re-fit curves after applying all the QC:
fits_qc = neutcurve.CurveFits(
frac_infectivity.rename(
columns={
"frac_infectivity_ceiling": "fraction infectivity",
"concentration": "serum concentration",
}
),
conc_col="serum concentration",
fracinf_col="fraction infectivity",
virus_col="strain",
serum_col="serum",
replicate_col="plate_barcode",
fixtop=curvefit_params["fixtop"],
fixbottom=curvefit_params["fixbottom"],
fixslope=curvefit_params["fixslope"],
)
fit_params_qc = fits_qc.fitParams(average_only=False, no_average=True)
assert len(fit_params_qc) <= len(
fits_noqc.fitParams(average_only=False, no_average=True)
)
print(f"Assigning fits for this plate to {group}")
fit_params_qc.insert(0, "group", group)
Assigning fits for this plate to Validation
Plot all the curves that passed QC:
if fits_qc.sera:
_ = fits_qc.plotReplicates(
attempt_shared_legend=False,
legendfontsize=8,
titlesize=10,
ticksize=10,
ncol=6,
draw_in_bounds=True,
)
else:
print("No sera passed QC.")
Save results to files¶
print(f"Writing fraction infectivities to {frac_infectivity_csv}")
(
frac_infectivity[
[
"serum",
"strain",
"plate_barcode",
"dilution_factor",
"frac_infectivity_raw",
"frac_infectivity_ceiling",
]
]
.sort_values(["serum", "plate_barcode", "dilution_factor"])
.to_csv(frac_infectivity_csv, index=False, float_format="%.4g")
)
print(f"\nWriting fit parameters to {fits_csv}")
(
fit_params_qc.drop(columns=["nreplicates", "ic50_str"]).to_csv(
fits_csv, index=False, float_format="%.4g"
)
)
print(f"\nPickling neutcurve.CurveFits object for these data to {fits_pickle}")
with open(fits_pickle, "wb") as f:
pickle.dump(fits_qc, f)
print(f"\nWriting QC drops to {qc_drops_yaml}")
def tup_to_str(x):
return " ".join(x) if isinstance(x, tuple) else x
qc_drops_for_yaml = {
key: {tup_to_str(key2): val2 for key2, val2 in val.items()}
for key, val in qc_drops.items()
}
with open(qc_drops_yaml, "w") as f:
yaml.YAML(typ="rt").dump(qc_drops_for_yaml, f)
print("\nHere are the QC drops:\n***************************")
yaml.YAML(typ="rt").dump(qc_drops_for_yaml, sys.stdout)
Writing fraction infectivities to results/plates/plate14lib/frac_infectivity.csv Writing fit parameters to results/plates/plate14lib/curvefits.csv Pickling neutcurve.CurveFits object for these data to results/plates/plate14lib/curvefits.pickle Writing QC drops to results/plates/plate14lib/qc_drops.yml Here are the QC drops: *************************** wells: {} barcodes: {} barcode_wells: TTGTCCCGAGACAACA H1: min_no_serum_count_per_viral_barcode_well ACGGAATCCCCTGAGA G12: min_no_serum_count_per_viral_barcode_well barcode_serum_replicates: {} serum_replicates: {}