update

26hzhang · Mar 28, 2022 · dd1ff77 · dd1ff77
1 parent e1ae970
commit dd1ff77
Show file tree

Hide file tree

Showing 21 changed files with 117 additions and 10 deletions.
diff --git a/Bibtex/Explore and Match - End-to-End Video Grounding with Transformer.bib b/Bibtex/Explore and Match - End-to-End Video Grounding with Transformer.bib
@@ -0,0 +1,6 @@
+@article{woo2022explore,
+  title={Explore and Match: End-to-End Video Grounding with Transformer},
+  author={Woo, Sangmin and Park, Jinyoung and Koo, Inyong and Lee, Sumin and Jeong, Minki and Kim, Changick},
+  journal={arXiv preprint arXiv:2201.10168},
+  year={2022}
+}
diff --git a/...-grained Semantic Alignment Network for Weakly Supervised Temporal Language Grounding.bib b/...-grained Semantic Alignment Network for Weakly Supervised Temporal Language Grounding.bib
@@ -0,0 +1,14 @@
+@inproceedings{wang2021finegrained,
+    title = "Fine-grained Semantic Alignment Network for Weakly Supervised Temporal Language Grounding",
+    author = "Wang, Yuechen  and
+      Zhou, Wengang  and
+      Li, Houqiang",
+    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
+    month = nov,
+    year = "2021",
+    address = "Punta Cana, Dominican Republic",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2021.findings-emnlp.9",
+    doi = "10.18653/v1/2021.findings-emnlp.9",
+    pages = "89--99",
+}
diff --git a/Bibtex/GFlowNet Foundations.bib b/Bibtex/GFlowNet Foundations.bib
@@ -0,0 +1,6 @@
+@article{bengio2021gflownet,
+  title={GFlowNet Foundations},
+  author={Bengio, Yoshua and Deleu, Tristan and Hu, Edward J and Lahlou, Salem and Tiwari, Mo and Bengio, Emmanuel},
+  journal={arXiv preprint arXiv:2111.09266},
+  year={2021}
+}
diff --git a/Bibtex/Improving the Efficiency and Effectiveness for BERT-based Entity Resolution.bib b/Bibtex/Improving the Efficiency and Effectiveness for BERT-based Entity Resolution.bib
@@ -0,0 +1,11 @@
+@inproceedings{li2021improving, 
+	title={Improving the Efficiency and Effectiveness for BERT-based Entity Resolution}, 
+	volume={35}, 
+	url={https://ojs.aaai.org/index.php/AAAI/article/view/17562}, 
+	number={15}, 
+	booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, 
+	author={Li, Bing and Miao, Yukai and Wang, Yaoshu and Sun, Yifang and Wang, Wei}, 
+	year={2021}, 
+	month={May}, 
+	pages={13226-13233}
+}
diff --git a/Bibtex/Learning Visual Representations with Caption Annotations.bib b/Bibtex/Learning Visual Representations with Caption Annotations.bib
@@ -0,0 +1,8 @@
+@inproceedings{sariyildiz2020learning,
+  title={Learning visual representations with caption annotations},
+  author={Sariyildiz, Mert Bulent and Perez, Julien and Larlus, Diane},
+  booktitle={European Conference on Computer Vision},
+  pages={153--170},
+  year={2020},
+  organization={Springer}
+}
diff --git a/...D - A Scalable Dataset for Language Grounding in Videos from Movie Audio Descriptions.bib b/...D - A Scalable Dataset for Language Grounding in Videos from Movie Audio Descriptions.bib
@@ -0,0 +1,6 @@
+@article{soldan2021mad,
+  title={Mad: A scalable dataset for language grounding in videos from movie audio descriptions},
+  author={Soldan, Mattia and Pardo, Alejandro and Alc{\'a}zar, Juan Le{\'o}n and Heilbron, Fabian Caba and Zhao, Chen and Giancola, Silvio and Ghanem, Bernard},
+  journal={arXiv preprint arXiv:2112.00431},
+  year={2021}
+}
diff --git a/Bibtex/RSTNet - Captioning with Adaptive Attention on Visual and Non-Visual Words.bib b/Bibtex/RSTNet - Captioning with Adaptive Attention on Visual and Non-Visual Words.bib
@@ -0,0 +1,8 @@
+@inproceedings{zhang2021rstnet,
+    author    = {Zhang, Xuying and Sun, Xiaoshuai and Luo, Yunpeng and Ji, Jiayi and Zhou, Yiyi and Wu, Yongjian and Huang, Feiyue and Ji, Rongrong},
+    title     = {RSTNet: Captioning With Adaptive Attention on Visual and Non-Visual Words},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2021},
+    pages     = {15465-15474}
+}
diff --git a/...CM-Net - A Symmetrical One-stage Network for Temporal Language Localization in Videos.bib b/...CM-Net - A Symmetrical One-stage Network for Temporal Language Localization in Videos.bib
@@ -0,0 +1,11 @@
+@article{jia2022stcmnet,
+	title = {STCM-Net: A symmetrical one-stage network for temporal language localization in videos},
+	journal = {Neurocomputing},
+	volume = {471},
+	pages = {194-207},
+	year = {2022},
+	issn = {0925-2312},
+	doi = {https://doi.org/10.1016/j.neucom.2021.11.019},
+	url = {https://www.sciencedirect.com/science/article/pii/S0925231221016945},
+	author = {Zixi Jia and Minglin Dong and Jingyu Ru and Lele Xue and Sikai Yang and Chunbo Li},
+}
diff --git a/Bibtex/Unsupervised Temporal Video Grounding with Deep Semantic Clustering.bib b/Bibtex/Unsupervised Temporal Video Grounding with Deep Semantic Clustering.bib
@@ -0,0 +1,6 @@
+@inproceedings{liu2022unsupervised,
+  title={Unsupervised Temporal Video Grounding with Deep Semantic Clustering},
+  author={Liu, Daizong and Qu, Xiaoye and Wang, Yinzhen and Di, Xing and Zou, Kai and Cheng, Yu and Xu, Zichuan and Zhou, Pan},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  year={2022}
+}
diff --git a/Bibtex/VIVO.bib b/Bibtex/VIVO.bib
@@ -1,7 +1,6 @@
-@article{hu2020vivo,
-  title={VIVO: Surpassing Human Performance in Novel Object Captioning with Visual Vocabulary Pre-Training},
+@inproceedings{hu2021vivo,
+  title={VIVO: Visual Vocabulary Pre-Training for Novel Object Captioning},
   author={Hu, Xiaowei and Yin, Xi and Lin, Kevin and Wang, Lijuan and Zhang, Lei and Gao, Jianfeng and Liu, Zicheng},
-  journal={arXiv preprint arXiv:2009.13682},
-  year={2020},
-  url={https://arxiv.org/pdf/2009.13682.pdf}
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  year={2021},
 }
diff --git a/Bibtex/VirTex - Learning Visual Representations from Textual Annotations.bib b/Bibtex/VirTex - Learning Visual Representations from Textual Annotations.bib
@@ -0,0 +1,7 @@
+@inproceedings{desai2021virtex,
+  title={Virtex: Learning visual representations from textual annotations},
+  author={Desai, Karan and Johnson, Justin},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={11162--11173},
+  year={2021}
+}
diff --git a/.../Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval.bib b/.../Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval.bib
@@ -0,0 +1,12 @@
+@inproceedings{wang2021visual,
+  author = {Wang, Zheng and Chen, Jingjing and Jiang, Yu-Gang},
+  title = {Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval},
+  year = {2021},
+  isbn = {9781450386517},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/3474085.3475278},
+  booktitle = {Proceedings of the 29th ACM International Conference on Multimedia},
+  pages = {1459–1468},
+  numpages = {10}
+}
diff --git a/...ts/Papers/Improving the Efficiency and Effectiveness for BERT-based Entity Resolution.pdf b/...ts/Papers/Improving the Efficiency and Effectiveness for BERT-based Entity Resolution.pdf
diff --git a/...CM-Net - A Symmetrical One-stage Network for Temporal Language Localization in Videos.pdf b/...CM-Net - A Symmetrical One-stage Network for Temporal Language Localization in Videos.pdf
diff --git a/.../Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval.pdf b/.../Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval.pdf
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
     2. [Char/Word Embeddings, Sentence Representation and Natural Language Understanding](/readme/nlp/emb_sent_nlu.md)
     3. [Machine Translation and Language Transfer, Generation, Summarization and Dialogue](/readme/nlp/machine_translation.md)
     4. [Machine Comprehension and Question Answering](/readme/nlp/machine_comprehension.md)
-    5. [Sequence Labeling (POS, NER, SRL, RE, IE, IR, Parsing, EL, Coref, Discourse, etc)](/readme/nlp/sequence_labeling.md)
+    5. [Sequence Labeling and Information Retrieval (POS, NER, SRL, RE, IE, IR, Parsing, EL, Coref, Discourse, etc)](/readme/nlp/sequence_labeling.md)
     6. [Sentiment Analysis and Text Classification](/readme/nlp/classification.md)
     7. [Interpretability and Disambiguation](/readme/nlp/interpretability.md)
     8. [Other NLP Research Works](/readme/nlp/others.md)

diff --git a/readme/GAN_AE_NF.md b/readme/GAN_AE_NF.md
@@ -26,4 +26,5 @@
 - [2017 ICLR] **Density Estimation using Real NVP**, [[paper]](https://openreview.net/pdf?id=HkpbnH9lx), [[bibtex]](/Bibtex/Density%20Estimation%20using%20Real%20NVP.bib), sources: [[chrischute/real-nvp]](https://github.com/chrischute/real-nvp), [[xqding/RealNVP]](https://github.com/xqding/RealNVP).
 - [2017 NeurIPS] **The Reversible Residual Network: Backpropagation Without Storing Activations**, [[paper]](https://papers.nips.cc/paper/2017/file/f9be311e65d81a9ad8150a60844bb94c-Paper.pdf), [[bibtex]](/Bibtex/The%20Reversible%20Residual%20Network.bib), sources: [[renmengye/revnet-public]](https://github.com/renmengye/revnet-public).
 - [2018 NeurIPS] **Glow: Generative Flow with Invertible 1×1 Convolutions**, [[paper]](https://papers.nips.cc/paper/2018/file/d139db6a236200b21cc7f752979132d0-Paper.pdf), [[bibtex]](/Bibtex/Glow.bib), sources: [[openai/glow]](https://github.com/openai/glow), [[chaiyujin/glow-pytorch]](https://github.com/chaiyujin/glow-pytorch), [[rosinality/glow-pytorch]](https://github.com/rosinality/glow-pytorch), [[samuelmat19/GLOW-tf2]](https://github.com/samuelmat19/GLOW-tf2).
-- [2020 TPAMI] **Normalizing Flows: An Introduction and Review of Current Methods**, [[paper]](https://arxiv.org/pdf/1908.09257v4.pdf), [[ArXiv v1]](https://arxiv.org/pdf/1908.09257v1.pdf), [[bibtex]](/Bibtex/Normalizing%20Flows.bib).
+- [2020 TPAMI] **Normalizing Flows: An Introduction and Review of Current Methods**, [[paper]](https://arxiv.org/pdf/1908.09257v4.pdf), [[ArXiv v1]](https://arxiv.org/pdf/1908.09257v1.pdf), [[bibtex]](/Bibtex/Normalizing%20Flows.bib).
+- [2021 ArXiv] **GFlowNet Foundations**, [[paper]](https://arxiv.org/pdf/2111.09266.pdf), [[bibtex]](/Bibtex/GFlowNet%20Foundations.bib).
diff --git a/readme/grounding/image/retrieval_captioning.md b/readme/grounding/image/retrieval_captioning.md
@@ -26,6 +26,7 @@
 - [2020 ECCV] **Finding It at Another Side: A Viewpoint-Adapted Matching Encoder for Change Captioning**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123590562.pdf), [[bibtex]](/Bibtex/Finding%20It%20at%20Another%20Side%20-%20A%20Viewpoint-Adapted%20Matching%20Encoder%20for%20Change%20Captioning.bib).
 - [2020 TPAMI] **Auto-encoding and Distilling Scene Graphs for Image Captioning**, [[paper]](/Documents/Papers/Auto-encoding%20and%20Distilling%20Scene%20Graphs%20for%20Image%20Captioning.pdf), [[bibtex]](/Bibtex/Auto-encoding%20and%20Distilling%20Scene%20Graphs%20for%20Image%20Captioning.bib), sources: [[yangxuntu/SGAE]](https://github.com/yangxuntu/SGAE).
 - [2021 CVPR] **Causal Attention for Vision-Language Tasks**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Yang_Causal_Attention_for_Vision-Language_Tasks_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/Causal%20Attention%20for%20Vision-Language%20Tasks.bib), [[supplementary]](https://openaccess.thecvf.com/content/CVPR2021/supplemental/Yang_Causal_Attention_for_CVPR_2021_supplemental.pdf), sources: [[yangxuntu/lxmertcatt]](https://github.com/yangxuntu/lxmertcatt).
+- [2021 CVPR] **RSTNet: Captioning with Adaptive Attention on Visual and Non-Visual Words**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Zhang_RSTNet_Captioning_With_Adaptive_Attention_on_Visual_and_Non-Visual_Words_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/RSTNet%20-%20Captioning%20with%20Adaptive%20Attention%20on%20Visual%20and%20Non-Visual%20Words.bib), sources: [[zhangxuying1004/RSTNet]](https://github.com/zhangxuying1004/RSTNet).
 
 ## Text-based Image Edit
 - [2019 ACL] **Expressing Visual Relationships via Language**, [[paper]](https://www.aclweb.org/anthology/P19-1182.pdf), [[bibtex]](/Bibtex/Expressing%20Visual%20Relationships%20via%20Language.bib), sources: [[airsplay/VisualRelationships]](https://github.com/airsplay/VisualRelationships).
diff --git a/readme/grounding/image/vision_language.md b/readme/grounding/image/vision_language.md
@@ -13,12 +13,14 @@
 - [2020 ICLR] **VL-BERT: Pre-training of Generic Visual-Linguistic Representations**, [[paper]](https://openreview.net/pdf?id=SygXPaEYvH), [[bibtex]](/Bibtex/VL-BERT%20-%20Pre-training%20of%20Generic%20Visual-Linguistic%20Representations.bib), sources: [[jackroos/VL-BERT]](https://github.com/jackroos/VL-BERT).
 - [2020 ICLR] **Variational Hetero-Encoder Randomized GANs for Joint Image-Text Modeling**, [[paper]](https://openreview.net/pdf?id=H1x5wRVtvS), [[bibtex]](/Bibtex/Variational%20Hetero-Encoder%20Randomized%20GANs%20for%20Joint%20Image-Text%20Modeling.bib).
 - [2020 ECCV] **Oscar: Object-Semantics Aligned Pre-training for Vision-Language Tasks**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123750120.pdf), [[bibtex]](/Bibtex/Oscar.bib), sources: [[microsoft/Oscar]](https://github.com/microsoft/Oscar).
+- [2020 ECCV] **Learning Visual Representations with Caption Annotations**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123530154.pdf), [[bibtex]](/Bibtex/Learning%20Visual%20Representations%20with%20Caption%20Annotations.bib), [[homepage]](https://europe.naverlabs.com/research/computer-vision/icmlm/).
 - [2020 ArXiv] **Pixel-BERT: Aligning Image Pixels with Text by Deep Multi-Modal Transformers**, [[paper]](https://arxiv.org/pdf/2004.00849.pdf), [[bibtex]](/Bibtex/Pixel-BERT.bib).
-- [2020 ArXiv] **VIVO: Surpassing Human Performance in Novel Object Captioning with Visual Vocabulary Pre-Training**, [[paper]](https://arxiv.org/pdf/2009.13682.pdf), [[bibtex]](/Bibtex/VIVO.bib).
 - [2020 ArXiv] **ImageBERT: Cross-modal Pre-training with Large-scale Weak-supervised Image-Text Data**, [[paper]](https://arxiv.org/pdf/2001.07966.pdf), [[bibtex]](/Bibtex/ImageBERT%20-%20Cross-modal%20Pre-training%20with%20Large-scale%20Weak-supervised%20Image-Text%20Data.bib).
-- [2020 ArXiv] **Contrastive Learning of Medical Visual Representations from Paired Images and Text**, [[paper]](https://arxiv.org/pdf/2010.00747.pdf), [[bibtex]](/Bibtex/Contrastive%20Learning%20of%20Medical%20Visual%20Representations%20from%20Paired%20Images%20and%20Text.bib).
+- [2020 ArXiv] **Contrastive Learning of Medical Visual Representations from Paired Images and Text**, [[paper]](https://arxiv.org/pdf/2010.00747.pdf), [[bibtex]](/Bibtex/Contrastive%20Learning%20of%20Medical%20Visual%20Representations%20from%20Paired%20Images%20and%20Text.bib), sources: [[edreisMD/ConVIRT-pytorch]](https://github.com/edreisMD/ConVIRT-pytorch).
 - [2021 ArXiv] **SemVLP: Vision-Language Pre-training by Aligning Semantics at Multiple Levels**, [[paper]](https://arxiv.org/pdf/2103.07829.pdf), [[bibtex]](/Bibtex/SemVLP%20-%20Vision-Language%20Pre-training%20by%20Aligning%20Semantics%20at%20Multiple%20Levels.bib).
+- [2021 AAAI] **VIVO: Visual Vocabulary Pre-Training for Novel Object Captioning**, [[paper]](https://arxiv.org/pdf/2009.13682.pdf), [[bibtex]](/Bibtex/VIVO.bib).
 - [2021 CVPR] **Causal Attention for Vision-Language Tasks**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Yang_Causal_Attention_for_Vision-Language_Tasks_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/Causal%20Attention%20for%20Vision-Language%20Tasks.bib), [[supplementary]](https://openaccess.thecvf.com/content/CVPR2021/supplemental/Yang_Causal_Attention_for_CVPR_2021_supplemental.pdf), sources: [[yangxuntu/lxmertcatt]](https://github.com/yangxuntu/lxmertcatt).
+- [2021 CVPR] **VirTex: Learning Visual Representations from Textual Annotations**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Desai_VirTex_Learning_Visual_Representations_From_Textual_Annotations_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/VirTex%20-%20Learning%20Visual%20Representations%20from%20Textual%20Annotations.bib), [[homepage]](http://kdexd.xyz/virtex/), sources: [[kdexd/virtex]](https://github.com/kdexd/virtex).
 - [2021 TKDD] **DiMBERT: Learning Vision-Language Grounded Representations with Disentangled Multimodal-Attention**, [[paper]](/Documents/Papers/DiMBERT%20-%20Learning%20Vision-Language%20Grounded%20Representations%20with%20Disentangled%20Multimodal-Attention.pdf), [[bibtex]](/Bibtex/DiMBERT%20-%20Learning%20Vision-Language%20Grounded%20Representations%20with%20Disentangled%20Multimodal-Attention.bib).
 - [2021 ICML] **CLIP: Learning Transferable Visual Models From Natural Language Supervision**, [[paper]](https://cdn.openai.com/papers/Learning_Transferable_Visual_Models_From_Natural_Language_Supervision.pdf), [[bibtex]](/Bibtex/Learning%20Transferable%20Visual%20Models%20From%20Natural%20Language%20Supervision.bib), [[slides]](https://icml.cc/media/icml-2021/Slides/9193.pdf), sources: [[openai/CLIP]](https://github.com/openai/CLIP).
 - [2021 NeurIPS] **Align before Fuse: Vision and Language Representation Learning with Momentum Distillation**, [[paper]](https://arxiv.org/pdf/2107.07651.pdf), [[bibtex]](/Bibtex/Align%20before%20Fuse%20-%20Vision%20and%20Language%20Representation%20Learning%20with%20Momentum%20Distillation.bib), sources: [[salesforce/ALBEF]](https://github.com/salesforce/ALBEF).