You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm experiencing a bug with the vot toolkit. The tracking recall returned by the "longterm_ar" metric is > 1 for sequences with occlusions. I'm using an oracle tracker: for every frame it returns the groundtruth.
# copy pasted from https://github.com/votchallenge/integration/blob/4aa5de6e3d87026e3ef516fa3ee02ee688d741ee/python/vot.pyimportosimportcollectionsimportnumpyasnptry:
importtraxexceptImportError:
raiseException("TraX support not found. Please add trax module to Python path.")
iftrax._ctypes.trax_version().decode("ascii") <"4.0.0":
raiseImportError("TraX version 4.0.0 or newer is required.")
Rectangle=collections.namedtuple("Rectangle", ["x", "y", "width", "height"])
Point=collections.namedtuple("Point", ["x", "y"])
Polygon=collections.namedtuple("Polygon", ["points"])
Empty=collections.namedtuple("Empty", [])
classVOT(object):
"""Base class for VOT toolkit integration in Python. This class is only a wrapper around the TraX protocol and can be used for single or multi-object tracking. The wrapper assumes that the experiment will provide new objects onlf at the first frame and will fail otherwise. """def__init__(self, region_format, channels=None, multiobject: bool=None):
"""Constructor for the VOT wrapper. Args: region_format: Region format options channels: Channels that are supported by the tracker multiobject: Whether to use multi-object tracking """assertregion_formatin [
trax.Region.RECTANGLE,
trax.Region.POLYGON,
trax.Region.MASK,
]
ifmultiobjectisNone:
multiobject=os.environ.get("VOT_MULTI_OBJECT", "0") =="1"ifchannelsisNone:
channels= ["color"]
elifchannels=="rgbd":
channels= ["color", "depth"]
elifchannels=="rgbt":
channels= ["color", "ir"]
elifchannels=="ir":
channels= ["ir"]
else:
raiseException("Illegal configuration {}.".format(channels))
self._trax=trax.Server(
[region_format],
[trax.Image.PATH],
channels,
metadata=dict(vot="python"),
multiobject=multiobject,
)
request=self._trax.wait()
assertrequest.type=="initialize"self._objects= []
assertlen(request.objects) >0and (multiobjectorlen(request.objects) ==1)
forobject, _inrequest.objects:
ifisinstance(object, trax.Polygon):
self._objects.append(Polygon([Point(x[0], x[1]) forxinobject]))
elifisinstance(object, trax.Mask):
self._objects.append(object.array(True))
else:
self._objects.append(Rectangle(*object.bounds()))
self._image= [x.path() fork, xinrequest.image.items()]
iflen(self._image) ==1:
self._image=self._image[0]
self._multiobject=multiobjectself._trax.status(request.objects)
defregion(self):
""" Returns initialization region for the first frame in single object tracking mode. Returns: initialization region """assertnotself._multiobjectreturnself._objects[0]
defobjects(self):
""" Returns initialization regions for the first frame in multi object tracking mode. Returns: initialization regions for all objects """returnself._objectsdefreport(self, status, confidence=None):
""" Report the tracking results to the client Arguments: status: region for the frame or a list of regions in case of multi object tracking confidence: confidence for the object detection, used only in single object tracking mode """defconvert(region):
"""Convert region to TraX format"""# If region is None, return empty regionifregionisNone:
returntrax.Rectangle.create(0, 0, 0, 0)
assertisinstance(region, (Empty, Rectangle, Polygon, np.ndarray))
ifisinstance(region, Empty):
returntrax.Rectangle.create(0, 0, 0, 0)
elifisinstance(region, Polygon):
returntrax.Polygon.create([(x.x, x.y) forxinregion.points])
elifisinstance(region, np.ndarray):
returntrax.Mask.create(region)
else:
returntrax.Rectangle.create(
region.x, region.y, region.width, region.height
)
ifnotself._multiobject:
properties= {}
ifnotconfidenceisNone:
properties["confidence"] =confidencestatus= [(convert(status), properties)]
else:
assertisinstance(status, (list, tuple))
status= [(convert(x), {}) forxinstatus]
self._trax.status(status, {})
defframe(self):
""" Get a frame (image path) from client Returns: absolute path of the image """ifhasattr(self, "_image"):
image=self._imagedelself._imagereturnimagerequest=self._trax.wait()
# Only the first frame can declare new objects for nowassertrequest.objectsisNoneorlen(request.objects) ==0ifrequest.type=="frame":
image= [x.path() fork, xinrequest.image.items()]
iflen(image) ==1:
returnimage[0]
returnimageelse:
returnNonedefquit(self):
"""Quit the tracker"""ifhasattr(self, "_trax"):
self._trax.quit()
def__del__(self):
"""Destructor for the tracker, calls quit."""self.quit()
# MY CODEclassOracleTracker:
def__init__(self, root):
withopen(root, "r") asf:
lines=f.readlines()
defparse_line(line):
ifline.strip() =="0":
returnEmpty()
else:
x, y, width, height= [float(x) forxinline.strip().split(",")]
returnRectangle(x, y, width, height)
self.groundtruth= [parse_line(line) forlineinlines]
self.i=1# skip initialization framedeftrack(self, imagefile):
box=self.groundtruth[self.i]
score=1.0self.i+=1returnbox, scoredefmain():
tracker=OracleTracker(
os.path.join(os.path.dirname(__file__), "..", "sequences", "airplane", "groundtruth.txt")
)
# *****************************************# VOT: Create VOT handle at the beginning# Then get the initializaton region# and the first image# *****************************************handle=VOT("rectangle")
# Process the first frameimagefile=handle.frame()
ifnotimagefile:
exit(0)
selection=handle.region()
whileTrue:
# *****************************************# VOT: Call frame method to get path of the# current image frame. If the result is# null, the sequence is over.# *****************************************imagefile=handle.frame()
ifnotimagefile:
breakselection, score=tracker.track(imagefile)
# *****************************************# VOT: Report the position of the object# every frame using report method.# *****************************************handle.report(selection, score)
if__name__=="__main__":
main()
the recall seems to take into account every frame, and not just the frames with the ground truth annotation as stated in the Eq. (2) in "Performance evaluation methodology for long-term single-object tracking", Lukezic et al., 2021:
What the code should do in my opinion is to filter out from overlaps and confidence arrays values without ground truth, i.e. transform this line
Thank you for the comprehensive report and the push request; I will review it and merge it if I do not see any problems. But it will happen in July when I have some time.
I'm experiencing a bug with the vot toolkit. The tracking recall returned by the "longterm_ar" metric is > 1 for sequences with occlusions. I'm using an oracle tracker: for every frame it returns the groundtruth.
To reproduce
mkdir repro
to create a workspace folderconfig.yaml
stack.yaml
trackers.ini
tracker/tracker.py
run the evaluation with
vot evaluate oracle
get the report
vot report oracle
Possible solution
After some debugging I found out that here
toolkit/vot/analysis/longterm.py
Lines 65 to 84 in c26c38b
the recall seems to take into account every frame, and not just the frames with the ground truth annotation as stated in the Eq. (2) in "Performance evaluation methodology for long-term single-object tracking", Lukezic et al., 2021:
What the code should do in my opinion is to filter out from
overlaps
andconfidence
arrays values without ground truth, i.e. transform this linetoolkit/vot/analysis/longterm.py
Line 82 in c26c38b
With this fix I was able to get
Related issues
Might be related to #63
The text was updated successfully, but these errors were encountered: