diff --git a/src/coffea/nanoevents/schemas/nanoaod.py b/src/coffea/nanoevents/schemas/nanoaod.py index 3f7b480af..5fc7cbccf 100644 --- a/src/coffea/nanoevents/schemas/nanoaod.py +++ b/src/coffea/nanoevents/schemas/nanoaod.py @@ -64,6 +64,7 @@ class NanoAODSchema(BaseSchema): "IsoTrack": "PtEtaPhiMCollection", "SoftActivityJet": "PtEtaPhiMCollection", "TrigObj": "PtEtaPhiMCollection", + "CorrT1METJet": "PtEtaPhiMCollection", # True lorentz: pt, eta, phi, mass "FatJet": "FatJet", "GenDressedLepton": "PtEtaPhiMCollection", @@ -314,6 +315,22 @@ def _build_collections(self, field_names, input_contents): if "Photon_energy" in branch_forms: branch_forms["Photon_regrEnergy"] = branch_forms.pop("Photon_energy") + # Alias CorrT1METJet_rawPt to CorrT1METJet_pt and add zero mass for CorrT1METJet collection + if "oCorrT1METJet" in branch_forms: + if "CorrT1METJet_pt" not in branch_forms: + branch_forms["CorrT1METJet_pt"] = branch_forms["CorrT1METJet_rawPt"] + if "CorrT1METJet_mass" not in branch_forms: + branch_forms["CorrT1METJet_mass"] = transforms.zeros_from_offsets_form( + branch_forms["oCorrT1METJet"] + ) + + # Add zero mass to the trigger objects + if "oTrigObj" in branch_forms: + if "TrigObj_mass" not in branch_forms: + branch_forms["TrigObj_mass"] = transforms.zeros_from_offsets_form( + branch_forms["oTrigObj"] + ) + output = {} for name in collections: mixin = self.mixins.get(name, "NanoCollection") diff --git a/src/coffea/nanoevents/trace.py b/src/coffea/nanoevents/trace.py index e3d97c284..f44f44c34 100644 --- a/src/coffea/nanoevents/trace.py +++ b/src/coffea/nanoevents/trace.py @@ -5,6 +5,8 @@ from functools import partial import awkward as ak +from awkward._nplikes.shape import unknown_length +from awkward.forms.form import regularize_buffer_key from coffea.nanoevents.util import unquote @@ -38,28 +40,164 @@ def _make_length_zero_one_tracer( buffer_key = events.attrs["@buffer_key"] buffer_keys = form.expected_from_buffers(buffer_key=buffer_key).keys() - if length == 0: - buffers = ak.to_buffers( - form.length_zero_array(), - byteorder=ak._util.native_byteorder, - )[2].values() - elif length == 1: - buffers = ak.to_buffers( - form.length_one_array(), - byteorder=ak._util.native_byteorder, - )[2].values() - else: - raise ValueError("length must be 0 or 1") - report = [] def generate(buffer, report, buffer_key): report.append(buffer_key) return buffer - container = {} - for key, buffer in zip(buffer_keys, buffers): - container[key] = partial(generate, buffer=buffer, report=report, buffer_key=key) + if length == 0: + buffers = {key: b"\x00\x00\x00\x00\x00\x00\x00\x00" for key in buffer_keys} + + container = {} + for key, buffer in buffers.items(): + container[key] = partial( + generate, buffer=buffer, report=report, buffer_key=key + ) + + elif length == 1: + buffers = {} + getkey = regularize_buffer_key(buffer_key) + + def prepare_empty(form): + form_key = form.form_key + + if isinstance(form, (ak.forms.BitMaskedForm, ak.forms.ByteMaskedForm)): + buffers[getkey(form, "mask")] = b"" + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.IndexedOptionForm): + buffers[getkey(form, "index")] = b"" + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.EmptyForm): + return form + + elif isinstance(form, ak.forms.UnmaskedForm): + return form.copy(content=prepare_empty(form.content)) + + elif isinstance(form, (ak.forms.IndexedForm, ak.forms.ListForm)): + buffers[getkey(form, "index")] = b"" + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.ListOffsetForm): + buffers[getkey(form, "offsets")] = b"" + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.RegularForm): + return form.copy(content=prepare_empty(form.content)) + + elif isinstance(form, ak.forms.NumpyForm): + buffers[getkey(form, "data")] = b"" + return form.copy(form_key=form_key) + + elif isinstance(form, ak.forms.RecordForm): + return form.copy(contents=[prepare_empty(x) for x in form.contents]) + + elif isinstance(form, ak.forms.UnionForm): + # both tags and index will get this buffer + buffers[getkey(form, "tags")] = b"" + buffers[getkey(form, "index")] = b"" + return form.copy( + contents=[prepare_empty(x) for x in form.contents], + form_key=form_key, + ) + + else: + raise AssertionError(f"not a Form: {form!r}") + + def prepare(form, multiplier): + form_key = form.form_key + + if isinstance(form, (ak.forms.BitMaskedForm, ak.forms.ByteMaskedForm)): + if form.valid_when: + buffers[getkey(form, "mask")] = b"\x00" * multiplier + else: + buffers[getkey(form, "mask")] = b"\xff" * multiplier + # switch from recursing down `prepare` to `prepare_empty` + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.IndexedOptionForm): + buffers[getkey(form, "index")] = ( + b"\xff\xff\xff\xff\xff\xff\xff\xff" # -1 + ) + # switch from recursing down `prepare` to `prepare_empty` + return form.copy(content=prepare_empty(form.content), form_key=form_key) + + elif isinstance(form, ak.forms.EmptyForm): + # no error if protected by non-recursing node type + raise TypeError( + "cannot generate a length_one_array from a Form with an " + "unknowntype that cannot be hidden (EmptyForm not within " + "BitMaskedForm, ByteMaskedForm, or IndexedOptionForm)" + ) + + elif isinstance(form, ak.forms.UnmaskedForm): + return form.copy(content=prepare(form.content, multiplier)) + + elif isinstance(form, (ak.forms.IndexedForm, ak.forms.ListForm)): + buffers[getkey(form, "index")] = b"\x00" * (8 * multiplier) + buffers[getkey(form, "starts")] = b"\x00" * (8 * multiplier) + buffers[getkey(form, "stops")] = b"\x00" * (8 * multiplier) + return form.copy( + content=prepare(form.content, multiplier), form_key=form_key + ) + + elif isinstance(form, ak.forms.ListOffsetForm): + # offsets length == array length + 1 + buffers[getkey(form, "offsets")] = b"\x00" * (8 * (multiplier + 1)) + return form.copy( + content=prepare(form.content, multiplier), form_key=form_key + ) + + elif isinstance(form, ak.forms.RegularForm): + size = form.size + + # https://github.com/scikit-hep/awkward/pull/2499#discussion_r1220503454 + if size is unknown_length: + size = 1 + + return form.copy(content=prepare(form.content, multiplier * size)) + + elif isinstance(form, ak.forms.NumpyForm): + dtype = ak.types.numpytype.primitive_to_dtype(form._primitive) + size = multiplier * dtype.itemsize + for x in form.inner_shape: + if x is not unknown_length: + size *= x + + buffers[getkey(form, "data")] = b"\x00" * size + return form.copy(form_key=form_key) + + elif isinstance(form, ak.forms.RecordForm): + return form.copy( + # recurse down all contents + contents=[prepare(x, multiplier) for x in form.contents] + ) + + elif isinstance(form, ak.forms.UnionForm): + # both tags and index will get this buffer, but index is 8 bytes + buffers[getkey(form, "tags")] = b"\x00" * (8 * multiplier) + buffers[getkey(form, "index")] = b"\x00" * (8 * multiplier) + # recurse down contents[0] with `prepare`, but others with `prepare_empty` + contents = [prepare(form.contents[0], multiplier)] + for x in form.contents[1:]: + contents.append(prepare_empty(x)) + return form.copy(contents=contents, form_key=form_key) + + else: + raise AssertionError(f"not a Form: {form!r}") + + form = prepare(form, 1) + container = {} + for key, buffer in buffers.items(): + container[key] = partial( + generate, buffer=buffer, report=report, buffer_key=key + ) + + else: + raise ValueError("length must be 0 or 1") + array = ak.from_buffers( form=form, length=length,