Hi. I cannot guarantee it, but looking at my old code it seems like the ids only need to be unique in a single scene:
scene_tracks_pred = self.tracks_pred[scene_id]
# Visualize the boxes in this frame.
if self.class_name in self.render_classes and threshold is None:
save_path = os.path.join(self.output_dir, 'render', str(scene_id), self.class_name)
os.makedirs(save_path, exist_ok=True)
renderer = TrackingRenderer(save_path)
else:
renderer = None
for timestamp in scene_tracks_gt.keys():
# Select only the current class.
frame_gt = scene_tracks_gt[timestamp]
frame_pred = scene_tracks_pred[timestamp]
frame_gt = [f for f in frame_gt if f.tracking_name == self.class_name]
frame_pred = [f for f in frame_pred if f.tracking_name == self.class_name]
# Threshold boxes by score. Note that the scores were previously averaged over the whole track.
if threshold is not None:
frame_pred = [f for f in frame_pred if f.tracking_score >= threshold]
# Group annotations wrt scene and timestamp.
for sample_token in all_boxes.sample_tokens:
sample_record = nusc.get('sample', sample_token)
scene_token = sample_record['scene_token']
tracks[scene_token][sample_record['timestamp']] = all_boxes.boxes[sample_token]
# Replace box scores with track score (average box score). This only affects the compute_thresholds method and
# should be done before interpolation to avoid diluting the original scores with interpolated boxes.
if not gt:
for scene_id, scene_tracks in tracks.items():
# For each track_id, collect the scores.
track_id_scores = defaultdict(list)
for timestamp, boxes in scene_tracks.items():
for box in boxes:
track_id_scores[box.tracking_id].append(box.tracking_score)
# Compute average scores for each track.
track_id_avg_scores = {}
for tracking_id, scores in track_id_scores.items():
track_id_avg_scores[tracking_id] = np.mean(scores)