Construct Friend Tree#

Basic Usage#

To construct a friend tree from scratch, you can follow the example below.

import json

from heptools.root import Chunk, Friend
from heptools.utils.json import DefaultEncoder

# initialize a friend tree
friend = Friend("test_friend")
# fetch the metadata of the target tree
target_tree_path="root://host.server//path/to/rootfile.root"
target_tree_name="Events"
target = Chunk(source=target_tree_path, name=target_tree_name, fetch=True)
# construct new branches
branches = ... # some arrays with the same length as the target tree
# attach to the target tree
friend.add(target, branches)
# dump to root file
friend.dump() # will write to "root://host.server//path/to/test_friend_{target.uuid}_0_{target.entry_stop}.root" by default
# save the friend tree to a json file
with open("friend.json", "w") as f:
    json.dump(friend, f, cls=DefaultEncoder)

Then, the data can be retrieved by calling arrays().

# load the friend tree from the json file
with open("friend.json", "r") as f:
    friend = Friend.from_json(json.load(f))
# fetch the metadata of the target tree
target = Chunk(source=target_tree_path, name=target_tree_name, fetch=True)
# retrieve branches
branches = friend.arrays(target)

You can also attach an existing tree as a friend.

# fetch the metadata of the existing tree
friend_tree_path="root://host.server//path/to/friend.root"
friend_tree_name="FriendEvents"
branches = Chunk(path=friend_tree_path, name=friend_tree_name, fetch=True)
# attach to the friend tree
friend.add(target, branches)
# friend.dump() # nothing to dump, so this line can be omitted

With coffea<=0.7.22#

The friend tree can be used inside coffea processors. For example,

import json

from heptools.root import Chunk, Friend
from heptools.utils.json import DefaultEncoder

class FriendTreeMaker(processor.ProcessorABC):
    def process(self, events):
        # initialize a friend tree
        friend = Friend("test_coffea_friend")
        # fetch the metadata of the target chunk
        target = Chunk.from_coffea_events(events)
        # construct new branches
        branches = ... # some arrays with the same length as events
        # attach to the target tree
        friend.add(target, branches)
        # dump to root file
        friend.dump()
        return friend # friend object can be accumulated

# run the processor
friend = ... # run the processor in a way you like
# (optional) merge the friend chunks to improve reading performance
merged_friend = friend.merge(step=100_000)
# save the friend tree to a json file
with open("friend.json", "w") as f:
    json.dump(merged_friend, f, cls=DefaultEncoder)

For a large dataset, you can use dask to merge.

import dask

merged_friend, = dask.compute(friend.merge(step=100_000, dask=True))

Then, the data can be retrieved in other processors. For example,

from heptools.root import Friend
from heptools.utils.json import DefaultEncoder

class OtherProcessor(processor.ProcessorABC):
    def __init__(self, friend_path):
        with open(friend_path, "r") as f:
            self.friend = Friend.from_json(json.load(f))

    def process(self, events):
        # fetch the metadata of the target chunk
        target = Chunk.from_coffea_events(events)
        # retrieve branches
        branches = self.friend.arrays(target)

With coffea>=2023.12.0#

# TODO construct: match partition, convert to array, convert back and add up.

# TODO retrieve: match partition, read from friend