Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhang Hao authored and Zhang Hao committed Mar 28, 2022
1 parent e1ae970 commit dd1ff77
Show file tree
Hide file tree
Showing 21 changed files with 117 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{woo2022explore,
title={Explore and Match: End-to-End Video Grounding with Transformer},
author={Woo, Sangmin and Park, Jinyoung and Koo, Inyong and Lee, Sumin and Jeong, Minki and Kim, Changick},
journal={arXiv preprint arXiv:2201.10168},
year={2022}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@inproceedings{wang2021finegrained,
title = "Fine-grained Semantic Alignment Network for Weakly Supervised Temporal Language Grounding",
author = "Wang, Yuechen and
Zhou, Wengang and
Li, Houqiang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.9",
doi = "10.18653/v1/2021.findings-emnlp.9",
pages = "89--99",
}
6 changes: 6 additions & 0 deletions Bibtex/GFlowNet Foundations.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{bengio2021gflownet,
title={GFlowNet Foundations},
author={Bengio, Yoshua and Deleu, Tristan and Hu, Edward J and Lahlou, Salem and Tiwari, Mo and Bengio, Emmanuel},
journal={arXiv preprint arXiv:2111.09266},
year={2021}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@inproceedings{li2021improving,
title={Improving the Efficiency and Effectiveness for BERT-based Entity Resolution},
volume={35},
url={https://ojs.aaai.org/index.php/AAAI/article/view/17562},
number={15},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
author={Li, Bing and Miao, Yukai and Wang, Yaoshu and Sun, Yifang and Wang, Wei},
year={2021},
month={May},
pages={13226-13233}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@inproceedings{sariyildiz2020learning,
title={Learning visual representations with caption annotations},
author={Sariyildiz, Mert Bulent and Perez, Julien and Larlus, Diane},
booktitle={European Conference on Computer Vision},
pages={153--170},
year={2020},
organization={Springer}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@article{soldan2021mad,
title={Mad: A scalable dataset for language grounding in videos from movie audio descriptions},
author={Soldan, Mattia and Pardo, Alejandro and Alc{\'a}zar, Juan Le{\'o}n and Heilbron, Fabian Caba and Zhao, Chen and Giancola, Silvio and Ghanem, Bernard},
journal={arXiv preprint arXiv:2112.00431},
year={2021}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@inproceedings{zhang2021rstnet,
author = {Zhang, Xuying and Sun, Xiaoshuai and Luo, Yunpeng and Ji, Jiayi and Zhou, Yiyi and Wu, Yongjian and Huang, Feiyue and Ji, Rongrong},
title = {RSTNet: Captioning With Adaptive Attention on Visual and Non-Visual Words},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2021},
pages = {15465-15474}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@article{jia2022stcmnet,
title = {STCM-Net: A symmetrical one-stage network for temporal language localization in videos},
journal = {Neurocomputing},
volume = {471},
pages = {194-207},
year = {2022},
issn = {0925-2312},
doi = {https://doi.org/10.1016/j.neucom.2021.11.019},
url = {https://www.sciencedirect.com/science/article/pii/S0925231221016945},
author = {Zixi Jia and Minglin Dong and Jingyu Ru and Lele Xue and Sikai Yang and Chunbo Li},
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@inproceedings{liu2022unsupervised,
title={Unsupervised Temporal Video Grounding with Deep Semantic Clustering},
author={Liu, Daizong and Qu, Xiaoye and Wang, Yinzhen and Di, Xing and Zou, Kai and Cheng, Yu and Xu, Zichuan and Zhou, Pan},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2022}
}
9 changes: 4 additions & 5 deletions Bibtex/VIVO.bib
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
@article{hu2020vivo,
title={VIVO: Surpassing Human Performance in Novel Object Captioning with Visual Vocabulary Pre-Training},
@inproceedings{hu2021vivo,
title={VIVO: Visual Vocabulary Pre-Training for Novel Object Captioning},
author={Hu, Xiaowei and Yin, Xi and Lin, Kevin and Wang, Lijuan and Zhang, Lei and Gao, Jianfeng and Liu, Zicheng},
journal={arXiv preprint arXiv:2009.13682},
year={2020},
url={https://arxiv.org/pdf/2009.13682.pdf}
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2021},
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@inproceedings{desai2021virtex,
title={Virtex: Learning visual representations from textual annotations},
author={Desai, Karan and Johnson, Justin},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={11162--11173},
year={2021}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@inproceedings{wang2021visual,
author = {Wang, Zheng and Chen, Jingjing and Jiang, Yu-Gang},
title = {Visual Co-Occurrence Alignment Learning for Weakly-Supervised Video Moment Retrieval},
year = {2021},
isbn = {9781450386517},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3474085.3475278},
booktitle = {Proceedings of the 29th ACM International Conference on Multimedia},
pages = {1459–1468},
numpages = {10}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
2. [Char/Word Embeddings, Sentence Representation and Natural Language Understanding](/readme/nlp/emb_sent_nlu.md)
3. [Machine Translation and Language Transfer, Generation, Summarization and Dialogue](/readme/nlp/machine_translation.md)
4. [Machine Comprehension and Question Answering](/readme/nlp/machine_comprehension.md)
5. [Sequence Labeling (POS, NER, SRL, RE, IE, IR, Parsing, EL, Coref, Discourse, etc)](/readme/nlp/sequence_labeling.md)
5. [Sequence Labeling and Information Retrieval (POS, NER, SRL, RE, IE, IR, Parsing, EL, Coref, Discourse, etc)](/readme/nlp/sequence_labeling.md)
6. [Sentiment Analysis and Text Classification](/readme/nlp/classification.md)
7. [Interpretability and Disambiguation](/readme/nlp/interpretability.md)
8. [Other NLP Research Works](/readme/nlp/others.md)
Expand Down
3 changes: 2 additions & 1 deletion readme/GAN_AE_NF.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@
- [2017 ICLR] **Density Estimation using Real NVP**, [[paper]](https://openreview.net/pdf?id=HkpbnH9lx), [[bibtex]](/Bibtex/Density%20Estimation%20using%20Real%20NVP.bib), sources: [[chrischute/real-nvp]](https://github.com/chrischute/real-nvp), [[xqding/RealNVP]](https://github.com/xqding/RealNVP).
- [2017 NeurIPS] **The Reversible Residual Network: Backpropagation Without Storing Activations**, [[paper]](https://papers.nips.cc/paper/2017/file/f9be311e65d81a9ad8150a60844bb94c-Paper.pdf), [[bibtex]](/Bibtex/The%20Reversible%20Residual%20Network.bib), sources: [[renmengye/revnet-public]](https://github.com/renmengye/revnet-public).
- [2018 NeurIPS] **Glow: Generative Flow with Invertible 1×1 Convolutions**, [[paper]](https://papers.nips.cc/paper/2018/file/d139db6a236200b21cc7f752979132d0-Paper.pdf), [[bibtex]](/Bibtex/Glow.bib), sources: [[openai/glow]](https://github.com/openai/glow), [[chaiyujin/glow-pytorch]](https://github.com/chaiyujin/glow-pytorch), [[rosinality/glow-pytorch]](https://github.com/rosinality/glow-pytorch), [[samuelmat19/GLOW-tf2]](https://github.com/samuelmat19/GLOW-tf2).
- [2020 TPAMI] **Normalizing Flows: An Introduction and Review of Current Methods**, [[paper]](https://arxiv.org/pdf/1908.09257v4.pdf), [[ArXiv v1]](https://arxiv.org/pdf/1908.09257v1.pdf), [[bibtex]](/Bibtex/Normalizing%20Flows.bib).
- [2020 TPAMI] **Normalizing Flows: An Introduction and Review of Current Methods**, [[paper]](https://arxiv.org/pdf/1908.09257v4.pdf), [[ArXiv v1]](https://arxiv.org/pdf/1908.09257v1.pdf), [[bibtex]](/Bibtex/Normalizing%20Flows.bib).
- [2021 ArXiv] **GFlowNet Foundations**, [[paper]](https://arxiv.org/pdf/2111.09266.pdf), [[bibtex]](/Bibtex/GFlowNet%20Foundations.bib).
1 change: 1 addition & 0 deletions readme/grounding/image/retrieval_captioning.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- [2020 ECCV] **Finding It at Another Side: A Viewpoint-Adapted Matching Encoder for Change Captioning**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123590562.pdf), [[bibtex]](/Bibtex/Finding%20It%20at%20Another%20Side%20-%20A%20Viewpoint-Adapted%20Matching%20Encoder%20for%20Change%20Captioning.bib).
- [2020 TPAMI] **Auto-encoding and Distilling Scene Graphs for Image Captioning**, [[paper]](/Documents/Papers/Auto-encoding%20and%20Distilling%20Scene%20Graphs%20for%20Image%20Captioning.pdf), [[bibtex]](/Bibtex/Auto-encoding%20and%20Distilling%20Scene%20Graphs%20for%20Image%20Captioning.bib), sources: [[yangxuntu/SGAE]](https://github.com/yangxuntu/SGAE).
- [2021 CVPR] **Causal Attention for Vision-Language Tasks**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Yang_Causal_Attention_for_Vision-Language_Tasks_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/Causal%20Attention%20for%20Vision-Language%20Tasks.bib), [[supplementary]](https://openaccess.thecvf.com/content/CVPR2021/supplemental/Yang_Causal_Attention_for_CVPR_2021_supplemental.pdf), sources: [[yangxuntu/lxmertcatt]](https://github.com/yangxuntu/lxmertcatt).
- [2021 CVPR] **RSTNet: Captioning with Adaptive Attention on Visual and Non-Visual Words**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Zhang_RSTNet_Captioning_With_Adaptive_Attention_on_Visual_and_Non-Visual_Words_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/RSTNet%20-%20Captioning%20with%20Adaptive%20Attention%20on%20Visual%20and%20Non-Visual%20Words.bib), sources: [[zhangxuying1004/RSTNet]](https://github.com/zhangxuying1004/RSTNet).

## Text-based Image Edit
- [2019 ACL] **Expressing Visual Relationships via Language**, [[paper]](https://www.aclweb.org/anthology/P19-1182.pdf), [[bibtex]](/Bibtex/Expressing%20Visual%20Relationships%20via%20Language.bib), sources: [[airsplay/VisualRelationships]](https://github.com/airsplay/VisualRelationships).
6 changes: 4 additions & 2 deletions readme/grounding/image/vision_language.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
- [2020 ICLR] **VL-BERT: Pre-training of Generic Visual-Linguistic Representations**, [[paper]](https://openreview.net/pdf?id=SygXPaEYvH), [[bibtex]](/Bibtex/VL-BERT%20-%20Pre-training%20of%20Generic%20Visual-Linguistic%20Representations.bib), sources: [[jackroos/VL-BERT]](https://github.com/jackroos/VL-BERT).
- [2020 ICLR] **Variational Hetero-Encoder Randomized GANs for Joint Image-Text Modeling**, [[paper]](https://openreview.net/pdf?id=H1x5wRVtvS), [[bibtex]](/Bibtex/Variational%20Hetero-Encoder%20Randomized%20GANs%20for%20Joint%20Image-Text%20Modeling.bib).
- [2020 ECCV] **Oscar: Object-Semantics Aligned Pre-training for Vision-Language Tasks**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123750120.pdf), [[bibtex]](/Bibtex/Oscar.bib), sources: [[microsoft/Oscar]](https://github.com/microsoft/Oscar).
- [2020 ECCV] **Learning Visual Representations with Caption Annotations**, [[paper]](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123530154.pdf), [[bibtex]](/Bibtex/Learning%20Visual%20Representations%20with%20Caption%20Annotations.bib), [[homepage]](https://europe.naverlabs.com/research/computer-vision/icmlm/).
- [2020 ArXiv] **Pixel-BERT: Aligning Image Pixels with Text by Deep Multi-Modal Transformers**, [[paper]](https://arxiv.org/pdf/2004.00849.pdf), [[bibtex]](/Bibtex/Pixel-BERT.bib).
- [2020 ArXiv] **VIVO: Surpassing Human Performance in Novel Object Captioning with Visual Vocabulary Pre-Training**, [[paper]](https://arxiv.org/pdf/2009.13682.pdf), [[bibtex]](/Bibtex/VIVO.bib).
- [2020 ArXiv] **ImageBERT: Cross-modal Pre-training with Large-scale Weak-supervised Image-Text Data**, [[paper]](https://arxiv.org/pdf/2001.07966.pdf), [[bibtex]](/Bibtex/ImageBERT%20-%20Cross-modal%20Pre-training%20with%20Large-scale%20Weak-supervised%20Image-Text%20Data.bib).
- [2020 ArXiv] **Contrastive Learning of Medical Visual Representations from Paired Images and Text**, [[paper]](https://arxiv.org/pdf/2010.00747.pdf), [[bibtex]](/Bibtex/Contrastive%20Learning%20of%20Medical%20Visual%20Representations%20from%20Paired%20Images%20and%20Text.bib).
- [2020 ArXiv] **Contrastive Learning of Medical Visual Representations from Paired Images and Text**, [[paper]](https://arxiv.org/pdf/2010.00747.pdf), [[bibtex]](/Bibtex/Contrastive%20Learning%20of%20Medical%20Visual%20Representations%20from%20Paired%20Images%20and%20Text.bib), sources: [[edreisMD/ConVIRT-pytorch]](https://github.com/edreisMD/ConVIRT-pytorch).
- [2021 ArXiv] **SemVLP: Vision-Language Pre-training by Aligning Semantics at Multiple Levels**, [[paper]](https://arxiv.org/pdf/2103.07829.pdf), [[bibtex]](/Bibtex/SemVLP%20-%20Vision-Language%20Pre-training%20by%20Aligning%20Semantics%20at%20Multiple%20Levels.bib).
- [2021 AAAI] **VIVO: Visual Vocabulary Pre-Training for Novel Object Captioning**, [[paper]](https://arxiv.org/pdf/2009.13682.pdf), [[bibtex]](/Bibtex/VIVO.bib).
- [2021 CVPR] **Causal Attention for Vision-Language Tasks**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Yang_Causal_Attention_for_Vision-Language_Tasks_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/Causal%20Attention%20for%20Vision-Language%20Tasks.bib), [[supplementary]](https://openaccess.thecvf.com/content/CVPR2021/supplemental/Yang_Causal_Attention_for_CVPR_2021_supplemental.pdf), sources: [[yangxuntu/lxmertcatt]](https://github.com/yangxuntu/lxmertcatt).
- [2021 CVPR] **VirTex: Learning Visual Representations from Textual Annotations**, [[paper]](https://openaccess.thecvf.com/content/CVPR2021/papers/Desai_VirTex_Learning_Visual_Representations_From_Textual_Annotations_CVPR_2021_paper.pdf), [[bibtex]](/Bibtex/VirTex%20-%20Learning%20Visual%20Representations%20from%20Textual%20Annotations.bib), [[homepage]](http://kdexd.xyz/virtex/), sources: [[kdexd/virtex]](https://github.com/kdexd/virtex).
- [2021 TKDD] **DiMBERT: Learning Vision-Language Grounded Representations with Disentangled Multimodal-Attention**, [[paper]](/Documents/Papers/DiMBERT%20-%20Learning%20Vision-Language%20Grounded%20Representations%20with%20Disentangled%20Multimodal-Attention.pdf), [[bibtex]](/Bibtex/DiMBERT%20-%20Learning%20Vision-Language%20Grounded%20Representations%20with%20Disentangled%20Multimodal-Attention.bib).
- [2021 ICML] **CLIP: Learning Transferable Visual Models From Natural Language Supervision**, [[paper]](https://cdn.openai.com/papers/Learning_Transferable_Visual_Models_From_Natural_Language_Supervision.pdf), [[bibtex]](/Bibtex/Learning%20Transferable%20Visual%20Models%20From%20Natural%20Language%20Supervision.bib), [[slides]](https://icml.cc/media/icml-2021/Slides/9193.pdf), sources: [[openai/CLIP]](https://github.com/openai/CLIP).
- [2021 NeurIPS] **Align before Fuse: Vision and Language Representation Learning with Momentum Distillation**, [[paper]](https://arxiv.org/pdf/2107.07651.pdf), [[bibtex]](/Bibtex/Align%20before%20Fuse%20-%20Vision%20and%20Language%20Representation%20Learning%20with%20Momentum%20Distillation.bib), sources: [[salesforce/ALBEF]](https://github.com/salesforce/ALBEF).
Expand Down
Loading

0 comments on commit dd1ff77

Please sign in to comment.