-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathindex.html
794 lines (686 loc) · 39.6 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Universal Actions for Enhanced Embodied Foundation Models</title>
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap"
rel="stylesheet" />
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" rel="stylesheet" />
<link href="https://ai-public.creatie.ai/gen_page/tailwind-custom.css" rel="stylesheet" />
<script
src="https://cdn.tailwindcss.com/[email protected],[email protected],[email protected],[email protected]"></script>
<script src="https://ai-public.creatie.ai/gen_page/tailwind-config.min.js" data-color="#000000"
data-border-radius="small"></script>
</head>
<body class="font-[Poppins] bg-gray-50">
<main>
<section
class="relative bg-white overflow-hidden bg-[url('https://ai-public.creatie.ai/gen_page/subtle_grid_pattern.png')] bg-opacity-5">
<div class="max-w-8xl mx-auto">
<div class="relative z-10 pb-8 bg-transparent sm:pb-16 md:pb-20 lg:pb-28 lg:w-full">
<div class="mt-16 mx-auto max-w-7xl px-4 sm:mt-24 sm:px-6">
<div class="text-center">
<h1 class="text-4xl tracking-tight font-bold text-gray-900 sm:text-5xl md:text-6xl">
<span class="block text-4xl tracking-tight font-bold text-gray-900 sm:text-5xl md:text-6xl">Universal
Actions for Enhanced Embodied Foundation Models</span>
</h1>
<p class="mt-3 max-w-md mx-auto text-base text-gray-500 sm:text-lg md:mt-5 md:text-xl md:max-w-3xl"></p>
<div class="flex justify-center gap-4 mt-6">
<a href="https://arxiv.org/abs/2501.10105"
class="!rounded-button inline-flex items-center px-6 py-2 border border-custom text-base font-medium text-custom bg-white hover:bg-gray-50">
<i class="fas fa-file-alt mr-2"></i>Paper</a>
<a href="https://github.com/2toinf/UniAct"
class="!rounded-button inline-flex items-center px-6 py-2 border border-custom text-base font-medium text-custom bg-white hover:bg-gray-50">
<i class="fas fa-code mr-2"></i>Code</a>
<a href="#"
class="!rounded-button inline-flex items-center px-6 py-2 border border-custom text-base font-medium text-custom bg-white hover:bg-gray-50">
<i class="fas fa-cube mr-2"></i>Models</a>
</div>
<div
class="mt-3 max-w-4xl mx-auto text-base text-gray-600 sm:text-lg md:mt-5 md:text-xl order-last flex flex-col items-center">
<p class="mb-4">
<a href='#' class="hover:text-custom"> Jinliang Zheng </a> <sup>1,2,5,*</sup>,
Jianxiong Li<sup>1,*</sup>,
Dongxiu Liu<sup>4,1,*</sup>,
Yinan Zheng<sup>1</sup>,
Zhihao Wang<sup>3,1</sup>,
Zhonghong Ou<sup>1</sup>,
Yu Liu<sup>2</sup>,
Jingjing Liu<sup>1</sup>,
Ya-Qin Zhang<sup>1</sup>,
Xianyuan Zhan<sup>1,5,†</sup>
</p>
<p class="text-sm text-gray-500">
<sup>1</sup>AIR, Tsinghua University<br />
<sup>2</sup>SenseTime Research<br />
<sup>3</sup>Peking University<br />
<sup>4</sup>Beijing University of Posts and Telecommunications<br />
<sup>5</sup>Shanghai AI Lab<br />
<sup>*</sup>
Equal contribution<sup>†</sup>Corresponding authors
</p>
<div class="grid grid-cols-5 gap-8 items-center justify-items-center mt-8 w-full">
<img src="static/images/Tsinghua_University_Logo.png" alt="Tsinghua University"
class="h-16 object-contain hover:grayscale-0 transition-all" />
<img src="static/images/sensetime_logo.png" alt="SenseTime"
class="h-16 object-contain hover:grayscale-0 transition-all" />
<img src="static/images/peking_university.png" alt="Peking University"
class="h-16 object-contain hover:grayscale-0 transition-all" />
<img src="static/images/bupt.png" alt="BUPT"
class="h-16 object-contain hover:grayscale-0 transition-all" />
<img src="static/images/ShanghaiAI.png" alt="Shanghai AI Lab"
class="h-16 object-contain hover:grayscale-0 transition-all" />
</div>
</div>
<p></p>
<div class="grid grid-cols-1 lg:grid-cols-1 gap-8 mt-8 order-first">
<!-- <div class="lg:pr-8"><img src="static/images/robot.png" alt="UniAct Model" class="w-full rounded-lg shadow-lg" /></div> -->
<div class="lg:pl-8">
<p class="text-left text-gray-600">
Training on diverse, internet-scale data is a key factor in the success of recent large foundation
models.
Yet, using the same recipe for building embodied agents has faced noticeable difficulties.
Despite the availability of many crowd-sourced embodied datasets,
their action spaces often exhibit significant heterogeneity due to distinct physical embodiment and
control interfaces for different robots,
causing substantial challenges in developing embodied foundation models using cross-embodiment data.
In this paper, we introduce UniAct, a new embodied foundation modeling framework operating in the
Universal Action Space.
Our learned universal actions capture the generic behaviors across diverse robots by exploiting
their shared structural features,
and enable enhanced cross-domain data utilization and cross-embodiment generalizations by
eliminating the notorious heterogeneity.
Moreover, the universal actions can be efficiently translated back to heterogeneous actionable
commands by simply adding embodiment-specific details,
from which fast adaptation to new robots becomes simple and straightforward.
Our 0.5B instantiation of UniAct outperforms 14X larger SOTA embodied foundation models in extensive
evaluations on various real-world and simulation robots,
showcasing exceptional cross-embodiment control and adaptation capability, highlighting the crucial
benefit of adopting universal actions.</p>
</div>
</div>
<p></p>
</div>
</div>
</div>
</div>
</section>
<section class="py-16 bg-gray-50">
<div class="max-w-8xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="text-center mb-12">
<h2 class="text-3xl font-bold text-gray-900">Play with Code!</h2>
<p class="text-gray-600 text-center mb-12 max-w-3xl mx-auto mt-4">Although universal actions represent highly
abstract embodiment-agnostic intentions, we can observe fascinating consistencies in how these
actions manifest across different embodiments. Try it yourself!</p>
</div>
<div class="flex justify-center gap-4 mb-8">
<button
class="px-6 py-2 bg-white border border-gray-300 rounded-lg shadow-sm hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-custom focus:ring-offset-2 video-switch-btn"
data-group="1">Move Down</button>
<button
class="px-6 py-2 bg-white border border-gray-300 rounded-lg shadow-sm hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-custom focus:ring-offset-2 video-switch-btn"
data-group="2">Move Upward</button>
<button
class="px-6 py-2 bg-white border border-gray-300 rounded-lg shadow-sm hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-custom focus:ring-offset-2 video-switch-btn"
data-group="3">Move Right</button>
<button
class="px-6 py-2 bg-white border border-gray-300 rounded-lg shadow-sm hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-custom focus:ring-offset-2 video-switch-btn"
data-group="4">Move Left</button>
<button
class="px-6 py-2 bg-white border border-gray-300 rounded-lg shadow-sm hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-custom focus:ring-offset-2 video-switch-btn"
data-group="5">Close the gripper</button>
</div>
<div class="flex justify-center grid-cols-1 md:grid-cols-3 gap-8">
<div class="video-container group-1">
<video src="static/videos/code/down/widowx.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop" >
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">WidowX Robot in real world</p>
</div>
<div class="video-container group-1">
<video src="static/videos/code/down/libero.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">Franka Robot in simulation</p>
</div>
<div class="video-container group-1">
<video src="static/videos/code/down/airbot.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">AIRBOT in real world-few shot adaptation</p>
</div>
<div class="video-container group-2" style="display: none;">
<video src="static/videos/code/lift/widowx.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop" >
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">WidowX Robot in real world</p>
</div>
<div class="video-container group-2" style="display: none;">
<video src="static/videos/code/lift/libero.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">Franka Robot in simulation</p>
</div>
<div class="video-container group-2" style="display: none;">
<video src="static/videos/code/lift/airbot.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">AIRBOT in real world-few shot adaptation</p>
</div>
<div class="video-container group-3" style="display: none;">
<video src="static/videos/code/right/widowx.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop" >
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">WidowX Robot in real world</p>
</div>
<div class="video-container group-3" style="display: none;">
<video src="static/videos/code/right/libero.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">Franka Robot in simulation</p>
</div>
<div class="video-container group-3" style="display: none;">
<video src="static/videos/code/right/airbot.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">AIRBOT in real world-few shot adaptation</p>
</div>
<div class="video-container group-4" style="display: none;">
<video src="static/videos/code/left/widowx.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop" >
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">WidowX Robot in real world</p>
</div>
<div class="video-container group-4" style="display: none;">
<video src="static/videos/code/left/libero.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">Franka Robot in simulation</p>
</div>
<div class="video-container group-4" style="display: none;">
<video src="static/videos/code/left/airbot.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">AIRBOT in real world-few shot adaptation</p>
</div>
<div class="video-container group-5" style="display: none;">
<video src="static/videos/code/close/widowx.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop" >
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">WidowX Robot in real world</p>
</div>
<div class="video-container group-5" style="display: none;">
<video src="static/videos/code/close/libero.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">Franka Robot in simulation</p>
</div>
<div class="video-container group-5" style="display: none;">
<video src="static/videos/code/close/airbot.mp4"
class="w-full aspect-video object-cover rounded-lg shadow-lg" controls="controls" autoplay="autoplay" loop="loop">
<source type="video/mp4" />
</video>
<p class="mt-2 text-sm text-gray-600 text-center">AIRBOT in real world-few shot adaptation</p>
</div>
</div>
</div>
</section>
</section>
<section class="py-16 bg-white">
<div class="max-w-8xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="text-center mb-8">
<h2 class="text-3xl font-bold text-gray-900">Generalized Cross-Embodiment Policy in Universal Action Space</h2>
<p class="mt-4 text-lg text-gray-500">Explore the potential of universal action in more embodiments!</p>
</div>
<div class="flex justify-center md:grid-cols-2 lg:grid-cols-4 gap-8 mb-8" id="slider-dots">
<div
class="flex flex-col items-center bg-white p-6 rounded-xl shadow-md hover:shadow-xl transition-shadow h-full">
<img src="static/images/widowx.png"
class="w-full h-64 object-cover rounded-xl shadow-lg mb-4 hover:transform hover:scale-105 transition-transform duration-300" />
<button
class="px-6 py-2 bg-primary-600 text-white rounded-full hover:bg-primary-700 transition-colors font-medium shadow-md hover:shadow-lg mt-auto" data-slide="0">
Watch Demo!
</button>
</div>
<div
class="flex flex-col items-center bg-white p-6 rounded-xl shadow-md hover:shadow-xl transition-shadow h-full">
<img src="static/images/franka.png"
class="w-full h-64 object-cover rounded-xl shadow-lg mb-4 hover:transform hover:scale-105 transition-transform duration-300" />
<button
class="px-6 py-2 bg-primary-600 text-white rounded-full hover:bg-primary-700 transition-colors font-medium shadow-md hover:shadow-lg mt-auto" data-slide="3">
Watch Demo!
</button>
</div>
<div
class="flex flex-col items-center bg-white p-6 rounded-xl shadow-md hover:shadow-xl transition-shadow h-full">
<img src="static/images/airbot.png"
class="w-full h-64 object-cover rounded-xl shadow-lg mb-4 hover:transform hover:scale-105 transition-transform duration-300" />
<button
class="px-6 py-2 bg-primary-600 text-white rounded-full hover:bg-primary-700 transition-colors font-medium shadow-md hover:shadow-lg mt-auto" data-slide="12">
Watch Demo!
</button>
</div>
<div
class="flex flex-col items-center bg-white p-6 rounded-xl shadow-md hover:shadow-xl transition-shadow h-full">
<img src="static/images/airbot_dual.png"
class="w-full h-64 object-cover rounded-xl shadow-lg mb-4 hover:transform hover:scale-105 transition-transform duration-300" />
<button
class="px-6 py-2 bg-primary-600 text-white rounded-full hover:bg-primary-700 transition-colors font-medium shadow-md hover:shadow-lg mt-auto" data-slide="17">
Watch Demo!
</button>
</div>
<div
class="flex flex-col items-center bg-white p-6 rounded-xl shadow-md hover:shadow-xl transition-shadow h-full">
<img src="static/images/coming_soon.png"
class="w-full h-64 object-cover rounded-xl shadow-lg mb-4 hover:transform hover:scale-105 transition-transform duration-300" />
</div>
</div>
<div class=" relative">
<div class="flex justify-center overflow-hidden relative">
<div class="flex gap-4 transition-transform duration-500 ease-in-out transform w-full" id="slider-container"
style="display: flex; gap: 1rem; transition: transform 0.5s cubic-bezier(0.4, 0, 0.2, 1); touch-action: pan-y pinch-zoom;">
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/1.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">put the eggplant into the pot</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/6.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">flip the pot</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/7.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">lift AAA battery</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/8.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">Put the toy man in the sink</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/9.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">Put the corn in the pot</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/widowx/10.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
<div class="absolute bottom-0 left-0 right-0 bg-black bg-opacity-50 p-4 text-white rounded-b-lg">
<h3 class="text-lg font-semibold">Put the eggplant in the pot</h3>
</div>
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/libero/1.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/libero/2.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/libero/3.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/libero/4.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/libero/6.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/1.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/2.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/5.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/3.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/4.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
<div class="min-w-[calc(25%-1rem)] flex-shrink-0">
<div class="relative"><img src="static/videos/airbot/6.gif"
class="w-full h-[400px] object-cover rounded-lg" alt="Project 1" />
</div>
</div>
</div>
</div>
<button
class="absolute left-4 top-1/2 transform -translate-y-1/2 bg-white p-2 rounded-full shadow-lg hover:bg-gray-100"
onclick="slideLeft()">
<i class="fas fa-chevron-left text-gray-600"></i></button>
<button
class="absolute right-4 top-1/2 transform -translate-y-1/2 bg-white p-2 rounded-full shadow-lg hover:bg-gray-100"
onclick="slideRight()">
<i class="fas fa-chevron-right text-gray-600"></i></button>
</div>
</div>
</section>
<section class="py-16 bg-gray-50">
<div class="max-w-8xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="text-center">
<h2 class="text-3xl font-bold text-gray-900">Methods</h2>
<p class="mt-4 text-lg text-gray-500">Our approach consists of the following key components</p>
</div>
<div class="mt-12 grid grid-cols-1 gap-8">
<div class="bg-white rounded-lg shadow-sm overflow-hidden col-span-2">
<div class="p-6 flex flex-col items-center">
<div class="max-w-4xl bg-gray-100 rounded-lg overflow-hidden mb-6">
<img src="static/images/intro_final.jpg" alt="Grabber Arm" class="w-full object-cover" />
</div>
<h3 class="mt-4 text-lg font-medium text-gray-900">Universal Action Space</h3>
<p class="mt-2 text-sm text-gray-500">Our novel approach to unifying robot control across different
embodiments through a shared action representation</p>
</div>
</div>
</div>
</div>
</section>
<section class="py-16 bg-white" id="results-section">
<div class="max-w-8xl mx-auto px-4 sm:px-6 lg:px-8">
<div class="text-center mb-12">
<h2 class="text-3xl font-bold text-gray-900">Experimental Results</h2>
<p class="mt-4 text-lg text-gray-500">Comprehensive evaluation across different platforms and tasks</p>
</div>
<div class="space-y-12">
<div class="overflow-x-auto">
<h3 class="text-xl font-semibold mb-4">Results on WindoX (Real World)</h3>
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Model</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Visual Generalization </th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Motion Generalization </th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Physical Generalization
</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Semantic Generalization
</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Language Grounding
</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Average </th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<tr>
<td class="px-6 py-4 whitespace-nowrap">Octo</td>
<td class="px-6 py-4">24.0</td>
<td class="px-6 py-4">30.0</td>
<td class="px-6 py-4">10.0</td>
<td class="px-6 py-4">19.4</td>
<td class="px-6 py-4">48.3</td>
<td class="px-6 py-4">28.9</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap">OpenVLA-7B</td>
<td class="px-6 py-4">66.0</td>
<td class="px-6 py-4">35.0</td>
<td class="px-6 py-4">60.0</td>
<td class="px-6 py-4">48.1</td>
<td class="px-6 py-4">88.3</td>
<td class="px-6 py-4">65.1</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap font-semibold">UniAct-0.5B(Ours)</td>
<td class="px-6 py-4 font-semibold text-custom">69.0</td>
<td class="px-6 py-4 font-semibold text-custom">66.0</td>
<td class="px-6 py-4 font-semibold text-custom">70.0</td>
<td class="px-6 py-4 font-semibold text-custom">38.8</td>
<td class="px-6 py-4 font-semibold text-custom">73.3</td>
<td class="px-6 py-4 font-semibold text-custom">63.3</td>
</tr>
</tbody>
</table>
</div>
<div class="overflow-x-auto">
<h3 class="text-xl font-semibold mb-4">Results on Franka (Simulation)</h3>
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Model</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">LIBERO-Spatial</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">LIBERO-Object</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">LIBERO-Goal</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">LIBERO-Long</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">LIBERO-90</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Average</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<tr>
<td class="px-6 py-4 whitespace-nowrap">Octo</td>
<td class="px-6 py-4">17.5</td>
<td class="px-6 py-4">12.0</td>
<td class="px-6 py-4">36.5</td>
<td class="px-6 py-4">20.0</td>
<td class="px-6 py-4">32.5</td>
<td class="px-6 py-4">27.7</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap">OpenVLA-7B</td>
<td class="px-6 py-4">43.0</td>
<td class="px-6 py-4">66.5</td>
<td class="px-6 py-4">54.5</td>
<td class="px-6 py-4">13.0</td>
<td class="px-6 py-4">44.1</td>
<td class="px-6 py-4">44.1</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap font-semibold">UniAct-0.5B(Ours)</td>
<td class="px-6 py-4 font-semibold text-custom">64.5</td>
<td class="px-6 py-4 font-semibold text-custom">77.5</td>
<td class="px-6 py-4 font-semibold text-custom">68.0</td>
<td class="px-6 py-4 font-semibold text-custom">46.5</td>
<td class="px-6 py-4 font-semibold text-custom">60.0</td>
<td class="px-6 py-4 font-semibold text-custom">61.3</td>
</tr>
</tbody>
</table>
</div>
<div class="overflow-x-auto">
<h3 class="text-xl font-semibold mb-4">Results on AIRBOT with different control interfaces(Fast Adaption)</h3>
<table class="min-w-full divide-y divide-gray-200">
<thead class="bg-gray-50">
<tr>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Model</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Relative EEF Position(easy/hard task)</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Relative Joint Position(easy/hard task)</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Absolute EEF Position(easy/hard task)</th>
<th class="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Absolute Joint Position(easy/hard task)</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200">
<tr>
<td class="px-6 py-4 whitespace-nowrap">Octo</td>
<td class="px-6 py-4">5.0 / 12.5</td>
<td class="px-6 py-4">22.5 / 17.5</td>
<td class="px-6 py-4">2.5 / 0.0</td>
<td class="px-6 py-4">0.0 / 0.0</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap">OpenVLA-7B</td>
<td class="px-6 py-4">32.5 / 22.5</td>
<td class="px-6 py-4">17.5 / 2.5</td>
<td class="px-6 py-4">32.5 / 15.0</td>
<td class="px-6 py-4">2.5 / 7.5</td>
</tr>
<tr>
<td class="px-6 py-4 whitespace-nowrap font-semibold">UniAct-0.5B(Ours)</td>
<td class="px-6 py-4 font-semibold text-custom">57.5 / 40.0</td>
<td class="px-6 py-4 font-semibold text-custom">47.5 / 45.0</td>
<td class="px-6 py-4 font-semibold text-custom">52.5 / 30.0</td>
<td class="px-6 py-4 font-semibold text-custom">70.0 / 30.0</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</main>
<script>let currentSlide = 0;
// <!-- zjl revise here!!!!! -->
const slide_length = [0, 6, 10, 16, 20]
const slideContainer = document.getElementById('slider-container');
const slides = slideContainer.children;
const dots = document.querySelectorAll('#slider-dots button');
const totalSlides = slides.length;
function updateSlider(animate = true) {
const slideWidth = slides[0].offsetWidth + 16;
const offset = currentSlide * slideWidth;
if (animate) {
slideContainer.style.transition = 'transform 0.5s cubic-bezier(0.4, 0, 0.2, 1)';
} else {
slideContainer.style.transition = 'none';
}
slideContainer.style.transform = `translateX(-${offset}px)`;
dots.forEach((dot, index) => {
dot.style.backgroundColor = (slide_length[index] <= currentSlide & currentSlide < slide_length[index+1]) ? '#4F46E5' : '#D1D5DB';
});
}
function slideLeft() {
if (currentSlide > 0) {
currentSlide--;
updateSlider();
} else {
// Optional: Bounce effect when at the start
slideContainer.style.transform = `translateX(20px)`;
setTimeout(() => {
slideContainer.style.transform = `translateX(0px)`;
}, 150);
}
}
function slideRight() {
if (currentSlide < totalSlides - 1) {
currentSlide++;
updateSlider();
} else {
// Optional: Bounce effect when at the end
const maxOffset = (totalSlides - 1) * (slides[0].offsetWidth + 16);
slideContainer.style.transform = `translateX(-${maxOffset - 20}px)`;
setTimeout(() => {
slideContainer.style.transform = `translateX(-${maxOffset}px)`;
}, 150);
}
}
function handleTouchStart(e) {
const touch = e.touches[0];
window.touchStartX = touch.clientX;
window.touchStartY = touch.clientY;
slideContainer.style.transition = 'none';
}
function handleTouchMove(e) {
if (!window.touchStartX) return;
const touch = e.touches[0];
const diffX = touch.clientX - window.touchStartX;
const diffY = touch.clientY - window.touchStartY;
// If vertical scrolling is dominant, don't slide
if (Math.abs(diffY) > Math.abs(diffX)) return;
e.preventDefault();
const slideWidth = slides[0].offsetWidth + 16;
const currentOffset = -(currentSlide * slideWidth);
const newOffset = currentOffset + diffX;
slideContainer.style.transform = `translateX(${newOffset}px)`;
}
function handleTouchEnd(e) {
if (!window.touchStartX) return;
const touch = e.changedTouches[0];
const diffX = touch.clientX - window.touchStartX;
if (Math.abs(diffX) > 50) {
if (diffX > 0 && currentSlide > 0) {
slideLeft();
} else if (diffX < 0 && currentSlide < totalSlides - 1) {
slideRight();
} else {
updateSlider();
}
} else {
updateSlider();
}
window.touchStartX = null;
window.touchStartY = null;
}
// Add touch events
slideContainer.addEventListener('touchstart', handleTouchStart);
slideContainer.addEventListener('touchmove', handleTouchMove);
slideContainer.addEventListener('touchend', handleTouchEnd);
// Add click event listeners to buttons
const leftButton = document.getElementById('id-88');
const rightButton = document.getElementById('id-90');
if (leftButton) leftButton.addEventListener('click', slideLeft);
if (rightButton) rightButton.addEventListener('click', slideRight);
// Add click events to dots
dots.forEach((dot, index) => {
dot.addEventListener('click', () => {
currentSlide = slide_length[index];
updateSlider();
});
});
// Initialize slider
document.addEventListener('DOMContentLoaded', () => {
updateSlider(false);
});
// Update on window resize with debounce
let resizeTimeout;
window.addEventListener('resize', () => {
clearTimeout(resizeTimeout);
resizeTimeout = setTimeout(() => updateSlider(false), 100);
});</script>
<script>
document.addEventListener('DOMContentLoaded', function () {
const videoContainers = document.querySelectorAll('.video-container');
const buttons = document.querySelectorAll('.video-switch-btn');
function showVideoGroup(groupNumber) { videoContainers.forEach(container => { if (container.classList.contains(`group-${groupNumber}`)) { container.style.display = 'block'; } else { container.style.display = 'none'; } }); buttons.forEach(btn => { if (btn.dataset.group === groupNumber.toString()) { btn.classList.add('bg-custom', 'text-white'); btn.classList.remove('bg-white'); } else { btn.classList.remove('bg-custom', 'text-white'); btn.classList.add('bg-white'); } }); } buttons.forEach(btn => { btn.addEventListener('click', () => { showVideoGroup(parseInt(btn.dataset.group)); }); }); showVideoGroup(1);
});</script>