######## snakemake preamble start (automatically inserted, do not edit) ########
import sys; sys.path.extend(['/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/site-packages', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax', '/home/aloes/miniconda3/envs/seqneut-pipeline/bin', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/lib-dynload', '/home/aloes/miniconda3/envs/seqneut-pipeline/lib/python3.12/site-packages', '/home/aloes/.cache/snakemake/snakemake/source-cache/runtime-cache/tmpzg9btu6h/file/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks', '/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\x952\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8c\'results/plates/plate13/curvefits.pickle\x94\x8c&results/plates/plate5/curvefits.pickle\x94e}\x94(\x8c\x06_names\x94}\x94\x8c\x07pickles\x94K\x00K\x02\x86\x94s\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh\x13\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h\x19)}\x94\x8c\x05_name\x94h\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x0fh\x06\x8c\tNamedlist\x94\x93\x94)\x81\x94(h\nh\x0be}\x94(h\r}\x94h\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bubub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8c5results/sera/DRIVE_D10042d30/titers_per_replicate.csv\x94\x8c\'results/sera/DRIVE_D10042d30/titers.csv\x94\x8c\'results/sera/DRIVE_D10042d30/curves.pdf\x94\x8c-results/sera/DRIVE_D10042d30/curvefits.pickle\x94\x8c)results/sera/DRIVE_D10042d30/qc_drops.yml\x94e}\x94(h\r}\x94(\x8c\x0eper_rep_titers\x94K\x00N\x86\x94\x8c\x06titers\x94K\x01N\x86\x94\x8c\ncurves_pdf\x94K\x02N\x86\x94\x8c\x06pickle\x94K\x03N\x86\x94\x8c\x08qc_drops\x94K\x04N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh<h5h>h6h@h7hBh8hDh9ub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(]\x94(\x8c\x14A/California/07/2009\x94\x8c\x12A/Michigan/45/2015\x94\x8c\x12A/Brisbane/02/2018\x94\x8c\x11A/Ghana/2080/2020\x94\x8c\x18A/Cote_DIvoire/1448/2021\x94\x8c\x0fA/Togo/845/2020\x94\x8c\x10A/Togo/0274/2021\x94\x8c\x10A/Ghana/138/2020\x94\x8c\x10A/Hawaii/70/2019\x94\x8c\x12A/Niger/10217/2021\x94\x8c\x19A/SouthAfrica/R16462/2021\x94\x8c\x10A/Nimes/871/2021\x94\x8c\x14A/Belgium/H0038/2022\x94\x8c\x12A/Paris/30353/2021\x94\x8c\x12A/Paris/31196/2021\x94\x8c\x10A/Togo/0304/2021\x94\x8c\x18A/England/220200318/2022\x94\x8c\x14A/Belgium/H0017/2022\x94\x8c\x14A/Washington/23/2020\x94\x8c\x14A/Wisconsin/588/2019\x94\x8c\x1aA/India-PUN-NIV328484/2021\x94\x8c\x13A/Norway/25089/2022\x94\x8c\x19A/SouthAfrica/R14850/2021\x94\x8c\x1aA/India/Pun-NIV312851/2021\x94\x8c\x16A/Bangladesh/8036/2021\x94\x8c\x0eA/Perth/1/2022\x94\x8c\x1cA/Bangladesh/3210810034/2021\x94\x8c\x0eA/Utah/27/2022\x94\x8c\x12A/Michigan/19/2021\x94\x8c\x13A/Chester/5355/2022\x94\x8c\x12A/Newcastle/2/2022\x94\x8c\x1fA/India-Pune-Nivcov2221170/2022\x94\x8c\x10A/Sydney/43/2022\x94\x8c\x12A/Brisbane/48/2022\x94\x8c\x16A/Bangladesh/8002/2021\x94\x8c\x16A/Bangladesh/2221/2021\x94e\x8c\x08midpoint\x94}\x94(\x8c\x0emin_replicates\x94K\x02\x8c\x1bmax_fold_change_from_median\x94K\n\x8c\x11viruses_ignore_qc\x94]\x94ue}\x94(h\r}\x94(\x8c\x17viral_strain_plot_order\x94K\x00N\x86\x94\x8c\x0eserum_titer_as\x94K\x01N\x86\x94\x8c\rqc_thresholds\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\x80hSh\x82hxh\x84hyub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94(\x8c\x05DRIVE\x94\x8c\tD10042d30\x94e}\x94(h\r}\x94(\x8c\x05group\x94K\x00N\x86\x94\x8c\x05serum\x94K\x01N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94b\x8c\x05group\x94h\x93\x8c\x05serum\x94h\x94ub\x8c\x07threads\x94K\x01\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x01K\x01\x8c\x15/loc/scratch/58619952\x94e}\x94(h\r}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xaeK\x01h\xb0K\x01h\xb2h\xabub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c9results/sera/DRIVE_D10042d30/DRIVE_D10042d30_titers.ipynb\x94a}\x94(h\r}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh\x11]\x94(h\x13h\x14eh\x13h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x13sNt\x94bh\x14h\x17h\x19\x85\x94R\x94(h\x19)}\x94h\x1dh\x14sNt\x94bh\xc4h\xc1ub\x8c\x06config\x94}\x94(\x8c\x10seqneut-pipeline\x94\x8c\x10seqneut-pipeline\x94\x8c\x04docs\x94\x8c\x04docs\x94\x8c\x0bdescription\x94X\xfa\x01\x00\x00# Sequencing-based neutralization assays of 2021-2022 DRIVE samples versus H1N1 influenza libraries\nStudy by Loes et al of samples from the DRIVE cohort using sequencing-based neutralization assay developed in the Bloom lab.\n\nSee [Loes et al (2024)](https://doi.org/10.1101/2024.03.08.584176) for the citation for this study.\n\nThe numerical data and computer code are at [https://github.com/jbloomlab/flu_seqneut_DRIVE_2021-22_repeat_vax](https://github.com/jbloomlab/flu_seqneut_DRIVE_2021-22_repeat_vax)\n\x94\x8c\x0fviral_libraries\x94}\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c-data/viral_libraries/pdmH1N1_lib2023_loes.csv\x94s\x8c\x0finitial_pooling\x94\x8c4data/initialpool/2022_pdmH1N1library_initialPool.csv\x94\x8c\x17viral_strain_plot_order\x94\x8c data/viral_strain_plot_order.csv\x94\x8c\x12neut_standard_sets\x94}\x94\x8c\x08loes2023\x94\x8c3data/neut_standard_sets/loes2023_neut_standards.csv\x94s\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\x08upstream\x94\x8c\x1cCCTACAATGTCGGATTTGTATTTAATAG\x94\x8c\ndownstream\x94\x8c\x00\x94\x8c\x04minq\x94K\x14\x8c\x11upstream_mismatch\x94K\x04\x8c\x0ebc_orientation\x94\x8c\x02R2\x94u\x8c#default_process_plate_qc_thresholds\x94}\x94(\x8c\x1bavg_barcode_counts_per_well\x94M\xe8\x03\x8c\x1fmin_neut_standard_frac_per_well\x94G?tz\xe1G\xae\x14{\x8c"no_serum_per_viral_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?@bM\xd2\xf1\xa9\xfc\x8c\x0fmax_fold_change\x94K\x04\x8c\tmax_wells\x94K\x02u\x8c!per_neut_standard_barcode_filters\x94}\x94(\x8c\x08min_frac\x94G?tz\xe1G\xae\x14{\x8c\x0fmax_fold_change\x94K\x04\x8c\tmax_wells\x94K\x02u\x8c min_neut_standard_count_per_well\x94M\xe8\x03\x8c)min_no_serum_count_per_viral_barcode_well\x94M\xf4\x01\x8c+max_frac_infectivity_per_viral_barcode_well\x94K\x05\x8c)min_dilutions_per_barcode_serum_replicate\x94K\x06u\x8c%default_process_plate_curvefit_params\x94}\x94(\x8c\x18frac_infectivity_ceiling\x94K\x01\x8c\x06fixtop\x94]\x94(G?\xe0\x00\x00\x00\x00\x00\x00K\x01e\x8c\tfixbottom\x94K\x00\x8c\x08fixslope\x94]\x94(G?\xe9\x99\x99\x99\x99\x99\x9aK\neu\x8c!default_process_plate_curvefit_qc\x94}\x94(\x8c\x1dmax_frac_infectivity_at_least\x94G?\xe0\x00\x00\x00\x00\x00\x00\x8c\x0fgoodness_of_fit\x94}\x94(\x8c\x06min_R2\x94G?\xe3333333\x8c\x08max_RMSD\x94G?\xb9\x99\x99\x99\x99\x99\x9au\x8c#serum_replicates_ignore_curvefit_qc\x94]\x94\x8c+barcode_serum_replicates_ignore_curvefit_qc\x94]\x94u\x8c\x06plates\x94}\x94(\x8c\x06plate1\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94\x8c\x08datetime\x94\x8c\x04date\x94\x93\x94C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate1_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate2\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate3\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x02\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate3_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate4\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x02\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate4_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate5\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x04\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate5_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10TCTGTTCCGGCCCGAA\x94\x8c\nD10042d182\x94eas\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate6\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x04\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate6_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94(\x8c\rbarcode_wells\x94]\x94]\x94(\x8c\x10TAATGAGCTTTATGGT\x94\x8c\x02F5\x94ea\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10ACGACATGATCAAACG\x94\x8c\nD10212d182\x94eau\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate7\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate7_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate8\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate8_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94(\x8c\rbarcode_wells\x94]\x94(]\x94(\x8c\x10TAGTATAATAGAGCAG\x94\x8c\x02D5\x94e]\x94(\x8c\x10CAGTTCTGCGACCAGC\x94\x8c\x02D9\x94ee\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10ACGGAATCCCCTGAGA\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GGATAAGAAAACTACT\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GTAACATTATACGATT\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10GACTCAATAATCACAC\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10CTATTAATCATGCAAA\x94\x8c\x08D10396d0\x94e]\x94(\x8c\x10TGGAATCGTCACCGAT\x94\x8c\tD10396d30\x94eeu\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x06plate9\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x05\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1edata/plates/plate9_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94]\x94(\x8c\x10CGGATAAAAATGATAT\x94\x8c\tD10417d30\x94eas\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x07plate10\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x06\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate10_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10CGGATAAAAATGATAT\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10GTTTGACAATCACTAC\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10AGCAGCCTGAAAATAT\x94\x8c\tD10175d30\x94e]\x94(\x8c\x10GACTCAATAATCACAC\x94\x8c\nD10175d182\x94ees\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x07plate11\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\t\x1a\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate11_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\x18barcode_serum_replicates\x94]\x94(]\x94(\x8c\x10ACGGAATCCCCTGAGA\x94\x8c\tD10041d30\x94e]\x94(\x8c\x10GATCCGTACTTTGATT\x94\x8c\x08D10256d0\x94e]\x94(\x8c\x10CATCAACCGCCATTTC\x94\x8c\x08D10256d0\x94ees\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\x07plate13\x94}\x94(\x8c\x05group\x94\x8c\x05DRIVE\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x0c\x01\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\x1fdata/plates/plate13_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00uu\x8c\nplate14lib\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c&data/plates/plate14fulllib_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uu\x8c\x0eplate14halflib\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c\'data/plates/plate14fhalflib_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uu\x8c\x0cplate14no5a1\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c$data/plates/plate14no5a1_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uu\x8c\x0cplate14no5a2\x94}\x94(\x8c\x05group\x94\x8c\nValidation\x94\x8c\x04date\x94j\x1b\x01\x00\x00C\x04\x07\xe7\x08\x07\x94\x85\x94R\x94\x8c\rviral_library\x94\x8c\x14pdmH1N1_lib2023_loes\x94\x8c\x11neut_standard_set\x94\x8c\x08loes2023\x94\x8c\x0bsamples_csv\x94\x8c$data/plates/plate14no5a2_samples.csv\x94\x8c\x0cmanual_drops\x94}\x94\x8c\rqc_thresholds\x94}\x94(h\xefM\xe8\x03h\xf0G?tz\xe1G\xae\x14{h\xf1}\x94(h\xf3G?@bM\xd2\xf1\xa9\xfch\xf4K\x04h\xf5K\x02uh\xf6}\x94(h\xf8G?tz\xe1G\xae\x14{h\xf9K\x04h\xfaK\x02uh\xfbM\xe8\x03h\xfcM\xf4\x01h\xfdK\x05h\xfeK\x06u\x8c\x0fcurvefit_params\x94}\x94(j\x01\x01\x00\x00K\x01j\x02\x01\x00\x00j\x03\x01\x00\x00j\x04\x01\x00\x00K\x00j\x05\x01\x00\x00j\x06\x01\x00\x00u\x8c\x0bcurvefit_qc\x94}\x94(j\t\x01\x00\x00G?\xe0\x00\x00\x00\x00\x00\x00j\n\x01\x00\x00}\x94(j\x0c\x01\x00\x00G?\xe3333333j\r\x01\x00\x00G?\xb9\x99\x99\x99\x99\x99\x9auj\x0e\x01\x00\x00j\x0f\x01\x00\x00j\x10\x01\x00\x00j\x11\x01\x00\x00u\x8c\x1eillumina_barcode_parser_params\x94}\x94(\x8c\tupstream2\x94\x8c\x06GCTACA\x94\x8c\x12upstream2_mismatch\x94K\x01uuu\x8c\x16default_serum_titer_as\x94hx\x8c\x1bdefault_serum_qc_thresholds\x94hy\x8c\x16sera_override_defaults\x94}\x94u\x8c\x04rule\x94\x8c\x12group_serum_titers\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8cs/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax/seqneut-pipeline/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/fh/fast/bloom_j/computational_notebooks/aloes/2024/flu_seqneut_DRIVE_2021-22_repeat_vax');
######## snakemake preamble end #########
Titers for a serum in a group¶
Analyze titers for a serum assigned to a group, aggregating replicates which may be across multiple plates.
import pickle
import sys
import altair as alt
import matplotlib.pyplot as plt
import neutcurve
import numpy
import pandas as pd
import ruamel.yaml as yaml
_ = alt.data_transformers.disable_max_rows()
Get variables from snakemake
:
pickle_fits = snakemake.input.pickles
per_rep_titers_csv = snakemake.output.per_rep_titers
titers_csv = snakemake.output.titers
curves_pdf = snakemake.output.curves_pdf
output_pickle = snakemake.output.pickle
qc_drops_file = snakemake.output.qc_drops
viral_strain_plot_order = snakemake.params.viral_strain_plot_order
serum_titer_as = snakemake.params.serum_titer_as
qc_thresholds = snakemake.params.qc_thresholds
serum = snakemake.wildcards.serum
group = snakemake.wildcards.group
print(f"Processing {group=}, {serum=}")
Processing group='DRIVE', serum='D10042d30'
Get all titers for this plate¶
Combine all the pickled neutcurve.CurveFits
from plates for this serum into a single neutcurve.CurveFits
:
print(f"Combining the curve fits for {group=}, {serum=} from {pickle_fits=}")
fits_to_combine = []
for fname in pickle_fits:
with open(fname, "rb") as f:
fits_to_combine.append(pickle.load(f))
fits_noqc = neutcurve.CurveFits.combineCurveFits(fits_to_combine, sera=[serum])
Combining the curve fits for group='DRIVE', serum='D10042d30' from pickle_fits=['results/plates/plate13/curvefits.pickle', 'results/plates/plate5/curvefits.pickle']
Indicate how we are calculating the titer:
print(f"Calculating with {serum_titer_as=}")
assert serum_titer_as in {"nt50", "midpoint"}
Calculating with serum_titer_as='midpoint'
Get all the per-replicate fit params with the titers. We also convert the IC50 to NT50, and take inverse of midpoint to get it on same scale as NT50s:
per_rep_titers = fits_noqc.fitParams(average_only=False, no_average=True).assign(
group=group,
nt50=lambda x: 1 / x["ic50"],
midpoint=lambda x: 1 / x["midpoint_bound"],
titer=lambda x: x["midpoint"] if serum_titer_as == "midpoint" else x["nt50"],
titer_bound=lambda x: (
x["midpoint_bound_type"] if serum_titer_as == "midpoint" else x["ic50_bound"]
).map({"lower": "upper", "upper": "lower", "interpolated": "interpolated"}),
titer_as=serum_titer_as,
)[
[
"group",
"serum",
"virus",
"replicate",
"titer",
"titer_bound",
"titer_as",
"nt50",
"midpoint",
"top",
"bottom",
"slope",
]
]
assert per_rep_titers.notnull().all().all()
if len(invalid_titer_as := per_rep_titers.query("(titer_as == 'nt50') and top <= 0.5")):
raise ValueError(
f"There are titers computed as nt50 when curve top <= 0.5:\n{invalid_titer_as}"
)
assert len(per_rep_titers) == per_rep_titers["replicate"].nunique()
# get viruses in the order to plot them
viruses = sorted(per_rep_titers["virus"].unique())
if viral_strain_plot_order is not None:
if not set(viruses).issubset(viral_strain_plot_order):
raise ValueError(
"`viral_strain_plot_order` lacks some viruses with titers:\n"
+ str(set(viruses) - set(viral_strain_plot_order))
)
viruses = [v for v in viral_strain_plot_order if v in viruses]
print(f"{serum=} has titers for a total of {len(viruses)} viruses")
serum='D10042d30' has titers for a total of 36 viruses
Correlate NT50s with midpoints of curves¶
Plot the correlation of the NT50s with the midpoint (this is an interactive plot, mouse over points for details).
This plot can help you determine if you made the correct choice of serum_titer_as
when choosing to use the midpoint or NT50 for the titer.
For titers where they are well correlated it should not matter which you chose.
But if there are titers far from the correlation line, you should look at those measurements and curves to make sure you made the correct choice of calculating the titer as the NT50 versus midpoint:
virus_selection = alt.selection_point(fields=["virus"], on="mouseover", empty=False)
midpoint_vs_nt50_chart = (
alt.Chart(per_rep_titers)
.add_params(virus_selection)
.encode(
alt.X("nt50", scale=alt.Scale(type="log", nice=False, padding=8)),
alt.Y("midpoint", scale=alt.Scale(type="log", nice=False, padding=8)),
alt.Color("titer_bound"),
strokeWidth=alt.condition(virus_selection, alt.value(3), alt.value(0)),
size=alt.condition(virus_selection, alt.value(100), alt.value(60)),
tooltip=[
alt.Tooltip(c, format=".2g") if per_rep_titers[c].dtype == float else c
for c in per_rep_titers.columns
if c not in {"group", "serum", "titer_as"}
],
)
.mark_circle(stroke="black", fillOpacity=0.45, color="black")
.properties(
width=350,
height=350,
title=f"NT50 versus midpoint for {group} {serum}",
)
.configure_axis(grid=False)
)
midpoint_vs_nt50_chart
Write the individual per-replicate titers to a file, this is before any QC has been applied:
print(f"Writing per-replicate titers (without QC filtering) to {per_rep_titers_csv=}")
per_rep_titers.to_csv(per_rep_titers_csv, index=False, float_format="%.4g")
Writing per-replicate titers (without QC filtering) to per_rep_titers_csv='results/sera/DRIVE_D10042d30/titers_per_replicate.csv'
Plot median titers and determine if they pass QC¶
Get the median titers for each virus across replicates, then add these median titers to the per-replicate titers and calculate the fold-change in titer between each replicate and its median. Finally, for each virus indicates whether it passes the QC:
print(f"Using the following {qc_thresholds=}")
def get_median_bound(s):
"""Get the bound on titer when taking median."""
s = list(s)
if len(s) % 2:
return s[len(s) // 2]
else:
bounds = s[len(s) // 2 - 1 : len(s) // 2 + 1]
assert len(bounds) == 2
if len(set(bounds)) == 1:
return bounds[0]
elif "interpolated" in bounds:
return [b for b in bounds if b != "interpolated"][0]
else:
return "inconsistent"
median_titers_noqc = (
per_rep_titers.sort_values("titer") # for getting median bound
.groupby(["group", "serum", "virus", "titer_as"], as_index=False)
.aggregate(
titer=pd.NamedAgg("titer", "median"),
n_replicates=pd.NamedAgg("replicate", "count"),
titer_sem=pd.NamedAgg("titer", "sem"),
titer_bound=pd.NamedAgg("titer_bound", get_median_bound),
)
)
per_rep_titers_w_fc = (
per_rep_titers.merge(
median_titers_noqc[["group", "serum", "virus", "titer"]].rename(
columns={"titer": "median_titer"}
),
validate="many_to_one",
on=["group", "serum", "virus"],
)
.assign(
fc_from_median=lambda x: numpy.where(
x["titer"] > x["median_titer"],
x["titer"] / x["median_titer"],
x["median_titer"] / x["titer"],
),
)
.drop(columns=["group", "serum", "titer_as", "median_titer"])
)
median_titers_noqc = median_titers_noqc.merge(
per_rep_titers_w_fc.groupby("virus", as_index=False).aggregate(
max_fc_from_median=pd.NamedAgg("fc_from_median", "max")
),
on="virus",
validate="one_to_one",
).assign(
fails_min_reps=lambda x: x["n_replicates"] < qc_thresholds["min_replicates"],
fails_max_fc=lambda x: (
x["max_fc_from_median"] >= qc_thresholds["max_fold_change_from_median"]
),
fails_qc=lambda x: x["fails_min_reps"] | x["fails_max_fc"],
fails_qc_reason=lambda x: (
x.apply(
lambda r: ", ".join(
(["min_replicates"] if r["fails_min_reps"] else [])
+ (["max_fold_change_from_median"] if r["fails_max_fc"] else [])
),
axis=1,
)
),
)
# get viruses failing QC in order to plot
viruses_failing_qc = (
median_titers_noqc.query("fails_qc").set_index("virus")["fails_qc_reason"].to_dict()
)
viruses_failing_qc = {
v: viruses_failing_qc[v] for v in viruses if v in viruses_failing_qc
}
median_titers_noqc = median_titers_noqc.drop(
columns=["fails_min_reps", "fails_max_fc", "fails_qc_reason"]
)
per_rep_titers_w_fc = per_rep_titers_w_fc.merge(
median_titers_noqc[["virus", "fails_qc"]],
on="virus",
validate="many_to_one",
)
Using the following qc_thresholds={'min_replicates': 2, 'max_fold_change_from_median': 10, 'viruses_ignore_qc': []}
Now plot the per-replicate and median titers, indicating any viruses that failed QC.
Note that potentially some of these titers may still be retained if the viruses in question are specified in viruses_ignore_qc
of qc_thresholds
.
virus_selection = alt.selection_point(fields=["virus"], on="mouseover", empty=False)
per_rep_chart = (
alt.Chart(per_rep_titers_w_fc)
.encode(
alt.X("titer", scale=alt.Scale(nice=False, padding=5, type="log")),
alt.Y("virus", sort=viruses),
alt.Fill(
"fails_qc",
title=f"fails {qc_thresholds['min_replicates']=}, {qc_thresholds['max_fold_change_from_median']=}",
legend=alt.Legend(titleLimit=500),
),
alt.Shape("titer_bound"),
strokeWidth=alt.condition(virus_selection, alt.value(2), alt.value(0)),
tooltip=[
alt.Tooltip(c, format=".3g") if per_rep_titers_w_fc[c].dtype == float else c
for c in per_rep_titers_w_fc
],
)
.mark_point(
size=35,
filled=True,
fillOpacity=0.5,
strokeOpacity=1,
stroke="black",
)
)
median_chart = (
alt.Chart(median_titers_noqc)
.encode(
alt.X("titer", scale=alt.Scale(nice=False, padding=5, type="log")),
alt.Y("virus", sort=viruses),
alt.Fill("fails_qc"),
alt.Shape("titer_bound"),
strokeWidth=alt.condition(virus_selection, alt.value(2), alt.value(0.5)),
tooltip=[
alt.Tooltip(c, format=".3g") if median_titers_noqc[c].dtype == float else c
for c in median_titers_noqc
],
)
.mark_point(
size=75,
filled=True,
fillOpacity=0.9,
strokeOpacity=1,
stroke="black",
)
)
titer_chart = (
(per_rep_chart + median_chart)
.add_params(virus_selection)
.properties(
height=alt.Step(11),
width=250,
title=f"{group} {serum} median (large points) and per-replicate (small points) titers",
)
.configure_axis(grid=False)
)
titer_chart
Plot individual curves for any viruses failing QC¶
Plot individual curves for viruses failing QC.
Note that potentially some of these titers may still be retained if the viruses in question are specified in viruses_ignore_qc
of qc_thresholds
.
print(f"Neutralization curves for the {len(viruses_failing_qc)} viruses failing QC:")
if len(viruses_failing_qc):
fig, _ = fits_noqc.plotReplicates(
viruses=viruses_failing_qc,
attempt_shared_legend=False,
legendfontsize=8,
ncol=4,
heightscale=1.2,
widthscale=1.2,
subplot_titles="{virus}",
draw_in_bounds=True,
)
_ = fig.suptitle(
f"neutralization curves for viruses failing QC for {group} {serum}",
y=1,
fontsize=18,
fontweight="bold",
)
fig.tight_layout()
Neutralization curves for the 0 viruses failing QC:
Get the viruses to drop for QC failures¶
Drop any viruses that fail QC and are not specified in viruses_ignore_qc
of qc_thresholds
:
viruses_to_drop = {
v: reason
for (v, reason) in viruses_failing_qc.items()
if v not in qc_thresholds["viruses_ignore_qc"]
}
print(f"Dropping {len(viruses_to_drop)} viruses for failing QC:")
yaml.YAML(typ="rt").dump(viruses_to_drop, sys.stdout)
if nkept := (len(viruses_failing_qc) - len(viruses_to_drop)):
print(
f"\nRetaining {nkept} viruses that fail QC because they are in `viruses_ignore_qc`:"
)
print(
{
v: reason
for (v, reason) in viruses_failing_qc.items()
if v in qc_thresholds["viruses_ignore_qc"]
}
)
print(f"\nWriting QC drops to {qc_drops_file}")
with open(qc_drops_file, "w") as f:
yaml.YAML(typ="rt").dump(viruses_to_drop, f)
Dropping 0 viruses for failing QC: {}
Writing QC drops to results/sera/DRIVE_D10042d30/qc_drops.yml
Get and plot the neutralization curves for all retained viruses¶
First, get the CurveFits
for just those retained viruses (dropping ones that fail QC), and plot:
fits_qc = neutcurve.CurveFits.combineCurveFits(
[fits_noqc],
viruses=[v for v in viruses if v not in viruses_to_drop],
)
assert len(viruses) == len(fits_qc.viruses[serum]) + len(viruses_to_drop)
fig, _ = fits_qc.plotReplicates(
attempt_shared_legend=False,
legendfontsize=8,
ncol=4,
heightscale=1.2,
widthscale=1.2,
subplot_titles="{virus}",
viruses=[v for v in viruses if v not in viruses_to_drop],
draw_in_bounds=True,
)
_ = fig.suptitle(
f"neutralization curves for retained viruses for {group} {serum}",
y=1,
fontsize=18,
fontweight="bold",
)
fig.tight_layout()
display(fig)
print(f"Saving to plot of curves to {curves_pdf}")
fig.savefig(curves_pdf)
plt.close(fig)
Saving to plot of curves to results/sera/DRIVE_D10042d30/curves.pdf
Save the CurveFits
to a pickle file:
with open(output_pickle, "wb") as f:
pickle.dump(fits_qc, f)
Write the titers (excluding QC dropped viruses) to a CSV:
print(f"Writing titers to {titers_csv}")
(
median_titers_noqc.query("virus not in @viruses_to_drop")[
[
"group",
"serum",
"virus",
"titer",
"titer_bound",
"titer_sem",
"n_replicates",
"titer_as",
]
].to_csv(titers_csv, index=False, float_format="%.4g")
)
Writing titers to results/sera/DRIVE_D10042d30/titers.csv