Semi-Supervised Learning
Semi-Supervised Learning
Maziar Raissi
Assistant Professor
<latexit sha1_base64="USyVVUP1NTUKfRIMnttD99743OA=">AAACL3icbVC7TgMxEPTxDO8AJY1FhESTcBckoIygoQwSgUghQnu+vcTCZ59sH1IU5UP4DL6AFr4A0SAaCv4CX3IFEKYaze5odidMBTfW99+8mdm5+YXF0tLyyura+kZ5c+vKqEwzbDEllG6HYFBwiS3LrcB2qhGSUOB1eHeWz6/vURuu5KUdpNhNoCd5zBlYJ92WD6tVqtFZDEo71qhA0JLLHgUZHShNBYQoaKpVCr3CVfFr/hh0mgQFqZACzdvy502kWJa4CCbAmE7gp7Y7BG05EzhavskMpsDuoIcdRyUkaLrD8XMjuueUiMbukFhJS8fqT8cQEmMGSeg2E7B983eWi//NOpmNT7pDLtPMomSToDgT1CqaN0UjrpFZMXAEmObuVsr6oIFZ1+evlMjkp41cL8HfFqbJVb0WHNXqF/VK47RoqER2yC7ZJwE5Jg1yTpqkRRh5IE/kmbx4j96r9+59TFZnvMKzTX7B+/oGw0iqPw==</latexit>
! labeled dataset
<latexit sha1_base64="XjVonP3p0I5E2o3Vunxr8nNHdOU=">AAACH3icbVBJSgNBFK12Nk6tLt0UBsFV6FZRl0E3LhVMIiRN+F39kxRWD1T9VkOTvdfwAm71Bu7EbS7gOawMC6cHBY/3/lQvzJQ05HlDZ2Z2bn5hcWm5tLK6tr7hbm7VTZprgTWRqlTfhGBQyQRrJEnhTaYR4lBhI7w9H/mNO9RGpsk19TMMYugmsiMFkJXa7m5Ly26PQOv0nrcIH6hQEKLCiEdAdjAN2m7Zq3hj8L/En5Iym+Ky7X62olTkMSYkFBjT9L2MggI0SaFwUGrlBjMQt9DFpqUJxGiCYvyXAd+zSsQ7qbYvIT5Wv3cUEBvTj0NbGQP1zG9vJP7nNXPqnAaFTLKcMBGTRZ1ccUr5KBgeSY2CVN8SEFraW7nogQZBNr4fWyIzOm1QssH4v2P4S+oHFf+4cnh1VK6eTSNaYjtsl+0zn52wKrtgl6zGBHtkz+yFvTpPzpvz7nxMSmecac82+wFn+AU+8KQx</latexit>
! unlabeled dataset
<latexit sha1_base64="WX3/j1Ls4TL2QAWMVZvgfFmc5mE=">AAACIXicbVC7SgNBFJ31bXxFLW0Go2AVdlXUMmhjqWCikIRwd/YmGZydXWbuqmHJD/gb/oCt/oGd2Im93+HkUfg6MHA4577mhKmSlnz/3ZuYnJqemZ2bLywsLi2vFFfXajbJjMCqSFRirkKwqKTGKklSeJUahDhUeBlenwz8yxs0Vib6gnopNmPoaNmWAshJreJWw8hOl8CY5JY3CO8oz7SCEBVGPAJyo6nfKpb8sj8E/0uCMSmxMc5axc9GlIgsRk1CgbX1wE+pmYMhKRT2C43MYgriGjpYd1RDjLaZD3/T59tOiXg7Me5p4kP1e0cOsbW9OHSVMVDX/vYG4n9ePaP2UTOXOs0ItRgtameKU8IH0fBIGhSkeo6AMNLdykUXDAhyAf7YEtnBaf2CCyb4HcNfUtstBwflvfP9UuV4HNEc22CbbIcF7JBV2Ck7Y1Um2D17ZE/s2XvwXrxX721UOuGNe9bZD3gfXxDxpSg=</latexit>
Adversarial Training
<latexit sha1_base64="nv2tOMZFUebmxB3CejJlp3054qE=">AAACHHicbVDLSsNAFJ3UV62vqksXDhbBVUkqqMuqG5cV2lpoQ7mZTNqhk0mYmQgldOlv+ANu9Q/ciVvBH/A7nLRZ2NYDA4dz7p17OF7MmdK2/W0VVlbX1jeKm6Wt7Z3dvfL+QVtFiSS0RSIeyY4HinImaEszzWknlhRCj9MHb3Sb+Q+PVCoWiaYex9QNYSBYwAhoI/XLx73pH6mk/uTazyZBMuC4KYEJJgb9csWu2lPgZeLkpIJyNPrln54fkSSkQhMOSnUdO9ZuClIzwumk1EsUjYGMYEC7hgoIqXLTaYgJPjWKj4NImic0nqp/N1IIlRqHnpkMQQ/VopeJ/3ndRAdXbspEnGgqyOxQkHCsI5y1gn0mKdF8bAgQyUxWTIYggWjTyNwVX2XRJiVTjLNYwzJp16rORfX8vlap3+QVFdEROkFnyEGXqI7uUAO1EEFP6AW9ojfr2Xq3PqzP2WjByncO0Rysr1+9gKLc</latexit>
Reduction LDS would make the model smooth at each data point.
<latexit sha1_base64="Qkkr92Jle5Ax8lZdZomMBSDeFmw=">AAACO3icbZA9TxtBEIb3ICTEkMQJJc0oFhKVdUckSBWhJAUFBZAYkIxlze2NuZX347Q7B7Is/5v8Df4AbdJQI1GgtPS5My7Cxyut9OidGc3smxZaBY7jq2hu/sXCy1eLrxtLy2/evmu+/3AYXOkldaTTzh+nGEgrSx1WrOm48IQm1XSUDr/V9aMz8kE5+5NHBfUMnlo1UBK5svrNLweUlbJm2P3+A85dqTMwOCTgnMC4jDQE4xzngAyEMocMGaFwynK70W+24nY8FTyFZAYtMdNev3lzkjlZGrIsNYbQTeKCe2P0rKSmSeOkDFSgHOIpdSu0aCj0xtN/TmCtcjIYOF89yzB1/58YowlhZNKq0yDn4XGtNp+rdUsefO6NlS1KJivvFw1KDeygDg0y5UmyHlWA0qvqVpA5epRcRftgSxbq0yZ1MMnjGJ7C4UY72Wx/2t9obX+dRbQoVsVHsS4SsSW2xY7YEx0hxS9xKX6LP9FFdB3dRn/vW+ei2cyKeKDo7h87nq2B</latexit>
! cross entropy
<latexit sha1_base64="qCZrjboG+BNRgEZ9uVoGwxMyiA0=">AAACHXicbVC7TsMwFHXKq5RXgJHFokJiqhJAwFjBwlgk+pCaqHIcp7XqxJF9A1RRV36DH2CFP2BDrIgf4DtwHwNtOZKlo3PuyydIBdfgON9WYWl5ZXWtuF7a2Nza3rF39xpaZoqyOpVCqlZANBM8YXXgIFgrVYzEgWDNoH898pv3TGkukzsYpMyPSTfhEacEjNSxsad4twdEKfmAPWCPkFMltcYsASXTwbBjl52KMwZeJO6UlNEUtY7944WSZrEZQAXRuu06Kfg5UcCpYMOSl2mWEtonXdY2NCEx034+/skQHxklxJFU5iWAx+rfjpzEWg/iwFTGBHp63huJ/3ntDKJLP+dJmgFL6GRRlAkMEo9iwSFXjIIYGEKo4uZWTHtEEQomvJktoR6dNiyZYNz5GBZJ46TinldOb8/K1atpREV0gA7RMXLRBaqiG1RDdUTRE3pBr+jNerberQ/rc1JasKY9+2gG1tcvI3ajqQ==</latexit>
for L2 norm
<latexit sha1_base64="CpFtbwwpEDq07NbTMU+bsYOV6BE=">AAACEnicbVA7TsNAFFzzDeEXQFQ0KxIkqsgOCCgjaCgogkQ+UmJZ6806WWW9a+0+IyIrt+ACtHADOkTLBbgA58BOUpCEqUYz8/RG40eCG7Dtb2tpeWV1bT23kd/c2t7ZLeztN4yKNWV1qoTSLZ8YJrhkdeAgWCvSjIS+YE1/cJP5zUemDVfyAYYRc0PSkzzglEAqeYXDDrAnSAKlcenOq5SwVDoceYWiXbbHwIvEmZIimqLmFX46XUXjkEmgghjTduwI3IRo4FSwUb4TGxYROiA91k6pJCEzbjKuP8InqdLFWYVAScBj9e9FQkJjhqGfJkMCfTPvZeJ/XjuG4MpNuIxiYJJOHgWxwKBwtgXucs0oiGFKCNU87Yppn2hCIV1s5kvXZNVG+XQYZ36GRdKolJ2L8tn9ebF6PZ0oh47QMTpFDrpEVXSLaqiOKErQC3pFb9az9W59WJ+T6JI1vTlAM7C+fgHuz52D</latexit>
ˆ p(y|x⇤ + r, ✓)]
<latexit sha1_base64="8OYyQTWFFPvhvEZjkMltArsN8/Y=">AAACP3icbVDLSgMxFM3Ud31VXboJFqHVUmZUVARB1IXLClYL7VAyadqGZh4kd8Rh7P/4G/6AW/UHdCdu3Zl2BrHVA4HDOedyb44TCK7ANF+NzMTk1PTM7Fx2fmFxaTm3snqt/FBSVqW+8GXNIYoJ7rEqcBCsFkhGXEewG6d3NvBvbplU3PeuIAqY7ZKOx9ucEtBSM3d6XpAlfNfcKuEGdBmQIj46xuf1oBDdJ2qXQJxY/WIJp/q2/MnbzVzeLJtD4L/ESkkepag0c2+Nlk9Dl3lABVGqbpkB2DGRwKlg/WwjVCwgtEc6rK6pR1ym7Hj41z7e1EoLt32pnwd4qP6eiImrVOQ6OukS6KpxbyD+59VDaB/aMfeCEJhHk0XtUGDw8aA43OKSURCRJoRKrm/FtEskoaDrHdnSUoPT+lldjDVew19yvVO29su7l3v5k9O0olm0jjZQAVnoAJ2gC1RBVUTRA3pCz+jFeDTejQ/jM4lmjHRmDY3A+PoGkF+s7Q==</latexit>
D(r, x⇤ , ✓)
ˆ Hessian matrix
<latexit sha1_base64="jHuPQnS42gYVChIFRmqrHlMdOjM=">AAACCXicbVDLSsNAFL2pr1pfVZduBovgqiQV1GXRTZcV7APaWCaTSTt0MgkzE7GEfoE/4Fb/wJ249Sv8Ab/DSZuFbT1w4XDOvdzD8WLOlLbtb6uwtr6xuVXcLu3s7u0flA+P2ipKJKEtEvFIdj2sKGeCtjTTnHZjSXHocdrxxreZ33mkUrFI3OtJTN0QDwULGMHaSA8NqhTDAoVYS/Y0KFfsqj0DWiVOTiqQozko//T9iCQhFZpwrFTPsWPtplhqRjidlvqJojEmYzykPUMFDqly01nqKTozio+CSJoRGs3UvxcpDpWahJ7ZNPFGatnLxP+8XqKDazdlIk40FWT+KEg40hHKKkA+k5RoPjEEE8lMVkRGWGKiTVELX3yVRZuWTDHOcg2rpF2rOpfVi7tapX6TV1SEEziFc3DgCurQgCa0gICEF3iFN+vZerc+rM/5asHKb45hAdbXLxL6mwk=</latexit>
<latexit sha1_base64="/o6EV7PJQ1Ftnha4/cpzXc5ODek=">AAACP3icbVDLSgNBEJz1bXxFPXoZTQQFCbsK6lH04lHFqJCE0DvpuIOzD2d61bDmf/wNf8Cr+gN6E6/enI05+GoYKKqru2vKT5Q05LrPzsDg0PDI6Nh4YWJyanqmODt3YuJUC6yKWMX6zAeDSkZYJUkKzxKNEPoKT/2Lvbx/eoXayDg6pk6CjRDOI9mWAshSzeLuESYKBPLy5Urn9ma1zK8lBVyS4SLVGiPiaEiGQFaS5JI1Xg+AsjoFSNBdLS82iyW34vaK/wVeH5RYvw6axZd6KxZpaJcLBcbUPDehRgaapFDYLdRTgwmICzjHmoURhGgaWe+vXb5smRZvx9o+a67Hfp/IIDSmE/pWaU0H5ncvJ//r1VJqbzcyGSUpYSS+DrVTxSnmeXC8JTUKUh0LQGhpvXIRgAZBNt4fV1omt9Yt2GC83zH8BSfrFW+zsnG4XtrZ7Uc0xhbYElthHttiO2yfHbAqE+yOPbBH9uTcO6/Om/P+JR1w+jPz7Ec5H59KT68G</latexit>
⇠ = 10 6 , d is a randomly sampled u
<latexit sha1_base64="vApTuqdlrtscS+LwCyy239DU06Q=">AAACNHicbVDLSgNBEJz1bXxFPXppTAQPGnYV1IsgevGoYFRIYuidndXBeSwzs2JY8in+hj/gVT9A8CZ69BucxBx81amo6qa6K84Ety4Mn4Oh4ZHRsfGJydLU9MzsXHl+4dTq3FBWp1pocx6jZYIrVnfcCXaeGYYyFuwsvj7o+Wc3zFiu1YnrZKwl8VLxlFN0XmqXt6vNWw67EIUXxfpWt7oG1aQK3AKCQZVoKTpgUWaCJZAr7uCGUacNtMuVsBb2AX9JNCAVMsBRu/zeTDTNJVOOCrS2EYWZaxVoHKeCdUvN3LIM6TVesoanCiWzraL/YBdWvJJA6nNTrRz01e8bBUprOzL2kxLdlf3t9cT/vEbu0p1WwVWWO6boV1CaC3Aaem1Bwo3/13eQcKSG+1uBXqFB6nynP1IS2zutW/LFRL9r+EtON2rRVm3zeKOytz+oaIIskWWySiKyTfbIITkidULJHXkgj+QpuA9egtfg7Wt0KBjsLJIfCD4+Ab6gqSQ=</latexit>
Miyato, Takeru, et al. "Virtual adversarial training: a regularization method for supervised and semi-supervised
virtual adversarial perturbation
<latexit sha1_base64="Qh2osFOYCdPsGwJK8D6D6K5gqp0=">AAACHXicbVBLSgNBFOzxG+Mv6tJNYxBchZkI6jLoxmUE84FkCG963iRNej509wRCyNZreAG3egN34la8gOewJ5mFSSxoKKrq8V6XlwiutG1/W2vrG5tb24Wd4u7e/sFh6ei4qeJUMmywWMSy7YFCwSNsaK4FthOJEHoCW97wLvNbI5SKx9GjHifohtCPeMAZaCP1SnTEpU5BUPCzGEhueIJGk14eKdsVewa6SpyclEmOeq/00/VjloYYaSZAqY5jJ9qdgNScCZwWu6nCBNgQ+tgxNIIQlTuZ/WRKz43i0yCW5kWaztS/ExMIlRqHnkmGoAdq2cvE/7xOqoMbd8KjJNUYsfmiIBVUxzSrhfpcItNibAgwyc2tlA1AAtOmlYUtvspOmxZNMc5yDaukWa04V5XLh2q5dptXVCCn5IxcEIdckxq5J3XSIIw8kRfySt6sZ+vd+rA+59E1K585IQuwvn4BsnGjYg==</latexit>
learning." IEEE transactions on pattern analysis and machine intelligence 41.8 (2018): 1979-1993.
Mean teachers are better role models:
Weight-averaged consistency targets improve semi-supervised deep learning results YouTube Video
J ! consistency loss
<latexit sha1_base64="Wn+jZ8R8q5C53IdTpeNspuCKW9g=">AAACInicbVDLSgNBEJyNrxhfqx69DIaAp7Croh6DXsRTBPOAJITZySQZMjuzzPSqYckX+Bv+gFf9A2/iSfDsdzib5GASGxqKqm6qu4JIcAOe9+VklpZXVtey67mNza3tHXd3r2pUrCmrUCWUrgfEMMElqwAHweqRZiQMBKsFg6tUr90zbbiSdzCMWCskPcm7nBKwVNst3OCm5r0+EK3VA24Ce4SEKmmsNZN0iIUyZtR2817RGxdeBP4U5NG0ym33p9lRNA6ZBCqIMQ3fi6CVEA2cCjbKNWPDIkIHpMcaFkoSMtNKxu+McMEyHdxV2rYEPGb/biQkNGYYBnYyJNA381pK/qc1YuhetBIuozj9bWLUjQUGhdNscIdrRkEMLSBUc3srpn2iCQWb4IxLx6SnjXI2GH8+hkVQPS76Z8WT29N86XIaURYdoEN0hHx0jkroGpVRBVH0hF7QK3pznp1358P5nIxmnOnOPpop5/sXsTSlfQ==</latexit>
– random translations
– random translations and horizontal – random translations
flips of the
Gaussian input
noise theand
onimages horizontal
input layer flips
– Gaussian noise on the input layer – Gaussian noise on
dropout applied the input
within layer
the network
– dropout applied within the network – dropout applied within the network
Ramp up the scale of consistency loss from
<latexit sha1_base64="OVjg5pOIgHr+je39135AsCa92BM=">AAACQHicbVDLSgNBEJz1bXxFPXppDIKnsKugHkUvHlWMCjGE3kmvGZzHMjMrxJAP8jf8Aa/6A+JNvHpyNubgq6ChqO6muivNpXA+jp+jsfGJyanpmdnK3PzC4lJ1eeXcmcJyanAjjb1M0ZEUmhpeeEmXuSVUqaSL9Oaw7F/cknXC6DPfy6ml8FqLTHD0QWpXD09R5VDk4LsEjqMkMBlwo13wJs17II1zkFmj4I6sAW9A+CAIjRJuURZUaVdrcT0eAv6SZERqbITjdvXlqmN4oUh7LtG5ZhLnvtVH6wWXNKhcFY5y5Dd4Tc1ANSpyrf7w2QFsBKUDmbGhtIeh+n2jj8q5nkrDpELfdb97pfhfr1n4bK/VFzovyse/jLJClh+XyUFHWOJe9gJBbkW4FXgXLXIf8v3h0nHlaYMymOR3DH/J+VY92alvn2zV9g9GEc2wNbbONlnCdtk+O2LHrME4u2eP7Ik9Rw/Ra/QWvX+NjkWjnVX2A9HHJ2OMsCo=</latexit>
Ramp up the scale of consistency loss from zero to its final value
<latexit sha1_base64="OVjg5pOIgHr+je39135AsCa92BM=">AAACQHicbVDLSgNBEJz1bXxFPXppDIKnsKugHkUvHlWMCjGE3kmvGZzHMjMrxJAP8jf8Aa/6A+JNvHpyNubgq6ChqO6muivNpXA+jp+jsfGJyanpmdnK3PzC4lJ1eeXcmcJyanAjjb1M0ZEUmhpeeEmXuSVUqaSL9Oaw7F/cknXC6DPfy6ml8FqLTHD0QWpXD09R5VDk4LsEjqMkMBlwo13wJs17II1zkFmj4I6sAW9A+CAIjRJuURZUaVdrcT0eAv6SZERqbITjdvXlqmN4oUh7LtG5ZhLnvtVH6wWXNKhcFY5y5Dd4Tc1ANSpyrf7w2QFsBKUDmbGhtIeh+n2jj8q5nkrDpELfdb97pfhfr1n4bK/VFzovyse/jLJClh+XyUFHWOJe9gJBbkW4FXgXLXIf8v3h0nHlaYMymOR3DH/J+VY92alvn2zV9g9GEc2wNbbONlnCdtk+O2LHrME4u2eP7Ik9Rw/Ra/QWvX+NjkWjnVX2A9HHJ2OMsCo=</latexit>
Tarvainen, Antti, and Harri Valpola. "Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results."
arXiv preprint arXiv:1703.01780 (2017).
MixMatch: A Holistic Approach
to Semi-Supervised Learning YouTube Playlist
pmodel (y|x; ✓) ! a generic model which produces a distribution over class labels y for an input x with parameters ✓
<latexit sha1_base64="HN/WTGhv741eBcMt+TgeaVXhS2M=">AAACmHicbVHLjtMwFHXCayiPKbCDzRUNYthUCSBAYlPBAtgNiM6M1FSV49w01jhxZN/QRiFfwpfxA3wHTtoFM8NdHZ37OMfHSaWkpTD87fnXrt+4eevg9ujO3Xv3D8cPHp5YXRuBc6GVNmcJt6hkiXOSpPCsMsiLROFpcv6x75/+QGOlLr9TU+Gy4OtSZlJwctRq/KtaxYRbagudouqOmp/b9zHlSPwFxEauc+LG6A3shjissUQjBQzjsMmlyKEyOq0FWuCQOsdGJnV/HLQTBqG4taB4gspC0ASQaQO8BFlWNUGwDWAjyd3ghhdIzikEO/2gW40n4TQcCq6CaA8mbF/Hq/GfONWiLrCkQXYRhRUtW25ICoXdKK4tVlyc8zUuHCydol22Q4gdPHNMOrjLdEkwsP9utLywtikSN1lwyu3lXk/+r7eoKXu3bIf3Yil2QlmtgDT0P+IyMyhINQ5wYaTzCiJ3cYg+jQsqqe2tdSMXTHQ5hqvg5OU0ejN99fX1ZPZhH9EBe8KesiMWsbdsxj6zYzZnwvO8517oRf5jf+Z/8r/sRn1vv/OIXSj/21+f18vH</latexit>
<latexit sha1_base64="7wwclTPPfkBr2B1Nd6Sy7pu0qnM=">AAACF3icbVBLTgJBFOzBH+Jv1J1uOoKJKzKjRl0S3bjERD4JENLTPKBDzyfdb4xkQuI1vIBbvYE749alF/Ac9gALASvppFL1Xup1eZEUGh3n28osLa+srmXXcxubW9s79u5eVYex4lDhoQxV3WMapAigggIl1CMFzPck1LzBTerXHkBpEQb3OIyg5bNeILqCMzRS2z5oIjxiEjHFfEAzSAtN7AOywqht552iMwZdJO6U5MkU5bb90+yEPPYhQC6Z1g3XibCVMIWCSxjlmrGGiPEB60HD0MAk6lYy/sOIHhulQ7uhMi9AOlb/biTM13roe2bSZ9jX814q/uc1YuxetRIRRDFCwCdB3VhSDGlaCO0IBRzl0BDGlTC3Ut43dfC0jZmUjk5PG+VMMe58DYukelp0L4pnd+f50vW0oiw5JEfkhLjkkpTILSmTCuHkibyQV/JmPVvv1of1ORnNWNOdfTID6+sXGPigWw==</latexit>
Consistency Regularization
<latexit sha1_base64="lGcpTrxly2mD6OOFbIy56UTToZQ=">AAACInicbVDLTgIxFO3gC/GFunTTSEhckRlM1CWRjUs08kiAkE7nAg2dzqTtmIwTvsDf8Afc6h+4M65MXPsddmAWAp6kycm55z563JAzpW37y8qtrW9sbuW3Czu7e/sHxcOjlgoiSaFJAx7IjksUcCagqZnm0AklEN/l0HYn9bTefgCpWCDudRxC3ycjwYaMEm2kQbHcm81IJHjTeiCU2QiCxvgORhEnkj1mvpJdsWfAq8TJSAllaAyKPz0voJEPQlNOlOo6dqj7CZGaUQ7TQi9SEBI6ISPoGiqID6qfzE6Z4rJRPDwMpHlC45n6tyMhvlKx7xqnT/RYLddS8b9aN9LDq37CRBiln5wvGkYc6wCn2WCPSaCax4YQKpm5FdMxkYRqk+DCFk+lp00LJhhnOYZV0qpWnIvK+W21VLvOIsqjE3SKzpCDLlEN3aAGaiKKntALekVv1rP1bn1Yn3Nrzsp6jtECrO9fRwel0A==</latexit>
encourage the model to produce the same output distribution when its inputs are perturbed
<latexit sha1_base64="xiJlcTn8iP/j/kJhlqgslRJCOgo=">AAACXXicbZA7T8MwFIXd8C6vAgMDi0WFxFQlIAEjgoWxSLQgtVVxnFtq4diRfQ1UUf8e/4GJjYkVZpy0A68rWTo69177+IszKSyG4UslmJmdm19YXKour6yurdc2NttWO8OhxbXU5iZmFqRQ0EKBEm4yAyyNJVzH9+dF//oBjBVaXeEog17K7pQYCM7QW/3abRfhCXNQ3N/I7oDiEGiqE5AUNc2MThyfmJalQLXDzCFNfDAjYlfcQR+HoKhAS4XyPUuZAZqBQWdiSMb9Wj1shGXRvyKaijqZVrNfe+smmrsUFHLJrO1EYYa9nBkUXMK42nUWMsbvfdiOl8rHsr28JDGme95J6EAbfxTS0v2+kbPU2lEa+8mU4dD+7hXmf72Ow8FJLy9/6FlNHhq4ElKB1RMxwFGOvGDcCJ+V8iEzjKOH/+OVxBbRxlUPJvqN4a9oHzSio8bh5UH99GyKaJHskF2yTyJyTE7JBWmSFuHkmbyTD/JZeQ3mgpVgbTIaVKY7W+RHBdtfDzm57Q==</latexit>
<latexit sha1_base64="rPMsXiirYr4efHceLFdQNiIuKBU=">AAACR3icbZC7SgNBFIZn4z3eopY2g0HQJuwqqKVoI1YKRoUkhLOzJ2ZwdmaZOauGJe/ka/gCgpVWtnZi6SSm8HZg4Oc/1/niTElHYfgUlMbGJyanpmfKs3PzC4uVpeVzZ3IrsC6MMvYyBodKaqyTJIWXmUVIY4UX8fXhIH9xg9ZJo8+ol2ErhSstO1IAeatdOW4S3lEB3JERXXAkBScL2nWMTYc1fIO6yOnWcEKbOg6xuUEOFrk2xGWC2veA2uy3K9WwFg6D/xXRSFTZKE7alddmYkSe+glCgXONKMyoVYD1AxX2y83cYQbiGq6w4aWGFF2rGP65z9e9k3B/pX+a+ND93lFA6lwvjX2l/0fX/c4NzP9yjZw6e61C6iwn1OJrUSdXnDwAD5An0qIg1fMChJUDYB6cBeHx/NySuMFp/bIHE/3G8Fecb9Windr26VZ1/2CEaJqtsjW2wSK2y/bZETthdSbYPXtkz+wleAjegvfg46u0FIx6VtiPKAWfV96zMw==</latexit>
“Virtual Adversarial Training” (VAT) ! computing an additive perturbation to apply to the input which maximally
<latexit sha1_base64="V6W6ffK9lJfPPU1woVBolUBeesI=">AAACrXicbZHNbtNAEMfX5quErwASFy4rItRyCTYg4MChBYE4FilJKyUhHa8n9qr7Ye2O20ZW3onX4QV4DtZODrRlpJX+mvnPzO5vs0pJT0nyO4pv3Lx1+87O3d69+w8ePuo/fjLxtnYCx8Iq644z8KikwTFJUnhcOQSdKTzKTr+09aMzdF5aM6JVhXMNhZFLKYBCatH/NSO8oObkZCId1aD4Qd7awcmgRw6kkabY3eV7k4PRqzWfOVmUBM7Zc77pFFZXNQUTB8MhzyXJM+QVhmku65ZwshyqSq1aQSVyaUIHPy+lKLmGC6lBhaIowRToO4etqbUIBd7zPFBwMqvbWeveoj9IhkkX/LpIt2LAtnG46P+Z5VbUGg1186ZpUtG8AUdSKFz3ZrXHCsQpFDgN0oBGP286smv+MmRyvrQuHBMu1Gb/7WhAe7/SWXBqoNJfrbXJ/9WmNS0/zpuOBBqxWbSsVYcofFN4tENBAUsuQbgAVbSAHAgKv3NpS+7bq3Vg0qsYrovJm2H6fvj2x7vB/uctoh32nL1geyxlH9g++84O2ZiJ6Fn0KfoafYtfx+N4Fv/cWONo2/OUXYq4+AsRYtWi</latexit>
maximally changes
changes the
the output
outputclass
classdistribution
distribution
<latexit sha1_base64="LHeBiECt8Hk+7Fq0naoeYo05R5Y=">AAACOnicbVDLSgMxFM34tr6qLt0Ei+CqzKioG0F047KCVaEtJZO57QSTzJDc0ZahX+Nv+ANudeXWhSBu/QAzbRe+DgQO596be+4JUyks+v6LNzE5NT0zOzdfWlhcWl4pr65d2iQzHOo8kYm5DpkFKTTUUaCE69QAU6GEq/DmtKhf3YKxItEX2E+hpVhXi47gDJ3ULh81EXqY38WCx1SxnlBMyj7lMdNdsBRjoEmGaYaUS2YtjZwlI8KsmB6U2uWKX/WHoH9JMCYVMkatXX5rRgnPFGgc/tcI/BRbOTMouIRBqZlZSBm/YV1oOKqZAtvKh2cO6JZTItpJjHvaGSrU7xM5U9b2Veg6FcPY/q4V4n+1Roadw1YutDsTNB8t6mSSYkKLzNzRBji6WCLBuBHOaxGQYRxdsj+2RLawNgwm+B3DX3K5Uw32q7vne5Xjk3FEc2SDbJJtEpADckzOSI3UCSf35JE8kWfvwXv13r2PUeuEN55ZJz/gfX4BOEOvVw==</latexit>
Entropy Minimization
<latexit sha1_base64="0aXAwrsok8vfH1RpFsYnfSGXvco=">AAACHHicbVDLSsNAFJ3UV62vqksXDhbBVUkqqMuiCG6ECvYBbSiTyaQdOpmEmYkQQ5b+hj/gVv/AnbgV/AG/w0mahW09MHA45965h+OEjEplmt9GaWl5ZXWtvF7Z2Nza3qnu7nVkEAlM2jhggeg5SBJGOWkrqhjphYIg32Gk60yuMr/7QISkAb9XcUhsH4049ShGSkvD6uEg/yMRxE2vuRJBGMNbyqlPH4uJmlk3c8BFYhWkBgq0htWfgRvgyCdcYYak7FtmqOwECUUxI2llEEkSIjxBI9LXlCOfSDvJQ6TwWCsu9AKhH1cwV/9uJMiXMvYdPekjNZbzXib+5/Uj5V3YCeVhpAjH00NexKAKYNYKdKkgWLFYE4QF1VkhHiOBsNLdzVxxZRYtrehirPkaFkmnUbfO6qd3jVrzsqioDA7AETgBFjgHTXADWqANMHgCL+AVvBnPxrvxYXxOR0tGsbMPZmB8/QIjDaMb</latexit>
<latexit sha1_base64="oSsE/gUUvbn/U2LrL96yM5sx7hg=">AAACRXicbVA9bxNBEN0zkBiHJAZKmhVOpKSx7hwJkGgsaFIGCSeWbMua25uLV9mP0+5c5OPwX+Jv8AdoKKBMlw7RwtpxQRJe9fRmRu/NSwslPcXx96jx4OGjjc3m49bWk+2d3fbTZ6felk7gQFhl3TAFj0oaHJAkhcPCIehU4Vl68X45P7tE56U1H6kqcKLh3MhcCqAgTdvHWhqp5SfkNEOOhpwtKm5zvldMx4RzqrXNUC0Oqs/zt3wclggO93huHS+NghQVZjwDgmm7E3fjFfh9kqxJh61xMm1fjTMrSh08hQLvR0lc0KQGR1IoXLTGpccCxAWc4yhQAxr9pF59vOD7QclWKXJriK/Ufy9q0N5XOg2bGmjm786W4v9mo5LyN5NamqIkNOLGKC8VJ8uX9fFMOhSkqkBAOBmycjEDB4JCybdcMr+MtmiFYpK7Ndwnp71u8qp79KHX6b9bV9RkL9hLdsAS9pr12TE7YQMm2Bf2jf1gP6Ov0XX0K/p9s9qI1jfP2S1Ef/4CjeOywA==</latexit>
predictions on unlabeled data and using these as training targets in a standard cross-entropy loss
<latexit sha1_base64="BUwu90obAZZ/Esml/yUbYxND+EM=">AAACZnicbVFNbxMxEHWWrxKgBBDiwGVEhMSFaLcg4FjBhWORSFspiaJZe5JY9dorzywiWuU3cuYPIP4AV8Cb5kBbRrL0/N48z+i5rJ1lyfPvveza9Rs3b+3d7t+5e2///uDBw2MOTdQ01sGFeFoik7OexmLF0WkdCavS0Ul59qHTT75QZBv8Z1nXNKtw6e3CapREzQd2KvRV2uQxVncUQ/DQeIclOTJgUBDQG2jY+iXIipgAGSSi9VsG45KEwXpAYEmtGA3oGJhfkpcY6jW4dNnMB8N8lG8LroJiB4ZqV0fzwY+pCbqp0ivaIfOkyGuZtRjFakeb/rRhqlGf4ZImCXqsiGftNpINPE+MgUWI6XiBLfuvo8WKeV2VqbNCWfFlrSP/p00aWbybtdbXjZDX54MWjQMJ0OULxkbS4tYJoI427Qp6hRG1pF+4MMVwt9qmn4IpLsdwFRwfjIo3o1efXg8P3+8i2lNP1TP1QhXqrTpUH9WRGiutvqlf6rf60/uZ7WePsyfnrVlv53mkLlQGfwFgWL08</latexit>
Traditional Regularization
<latexit sha1_base64="93d7bR0k6r7eX8gIxrg92gWlJQk=">AAACInicbVDLSsNAFJ3Ud31VXboZLIKrklRQl0U3LlX6ENogN5ObdujkwcxEqCFf4G/4A271D9yJK8G13+GkzcLXgYHDOefOvRwvEVxp2363KnPzC4tLyyvV1bX1jc3a1nZXxalk2GGxiOW1BwoFj7CjuRZ4nUiE0BPY88Znhd+7Ral4HLX1JEE3hGHEA85AG+mmtj+Y/pFJ9PO2BJ8XMgh6hcNUgOR3Za5uN+wp6F/ilKROSlzc1D4HfszSECPNBCjVd+xEuxlIzZnAvDpIFSbAxjDEvqERhKjcbHpKTveN4tMgluZFmk7V7xMZhEpNQs8kQ9Aj9dsrxP+8fqqDEzfjUZJqjNhsUZAKqmNadEN9LpFpMTEEmDRVMMpGIIFp0+CPLb4qTsurphjndw1/SbfZcI4ah5fNeuu0rGiZ7JI9ckAcckxa5JxckA5h5J48kifybD1YL9ar9TaLVqxyZof8gPXxBSDvpbk=</latexit>
0
<latexit sha1_base64="S5BUfE/+tuxJ1McUG1d8zC4h6k8=">AAACN3icbVDLSgMxFM34tr6qLt0Eq+iqzKioCILoxqWCVaFTyp1M2oZmMkNyp7YM/Rd/wx9wq1tX7sStf2DazsKqBwKHcx/n5gSJFAZd982ZmJyanpmdmy8sLC4trxRX125NnGrGKyyWsb4PwHApFK+gQMnvE80hCiS/C9oXg/pdh2sjYnWDvYTXImgq0RAM0Er14omvRbOFoHX8QH3kXcw6oISUQCPRTROaGm7oli/tyhB26CnN6Va/Xiy5ZXcI+pd4OSmRHFf14ocfxiyNuEImwZiq5yZYy0CjYJL3C771SoC1ocmrliqIuKllwz/26bZVQtqItX0K6VD9OZFBZEwvCmxnBNgyv2sD8b9aNcXGcS0TKkmRKzYyaqSSYkwHgdFQaM5Q9iwBpoW9lbIWaGBoYx1zCc3gtH7BBuP9juEvud0re4fl/euD0tl5HtEc2SCbZJd45IickUtyRSqEkUfyTF7Iq/PkvDsfzueodcLJZ9bJGJyvb9ldrPI=</latexit>
x0 closer to x1 than x2
<latexit sha1_base64="OOEM7qGU+RzIGkbhV4qcsVsdRtA=">AAACHHicbVDLSsNAFJ3UV62vqEsXDjaiq5JUUJdFNy4r2Ae0IUwm03boZBJmJtISuvQ3/AG3+gfuxK3gD/gdTtosbOuBC4dz7uXee/yYUals+9sorKyurW8UN0tb2zu7e+b+QVNGicCkgSMWibaPJGGUk4aiipF2LAgKfUZa/vA281uPREga8Qc1jokboj6nPYqR0pJnHlujMwtiFkkioIqgNfIcC6oB4hmtWp5Ztiv2FHCZODkpgxx1z/zpBhFOQsIVZkjKjmPHyk2RUBQzMil1E0lihIeoTzqachQS6abTRybwVCsB7EVCF1dwqv6dSFEo5Tj0dWeI1EAuepn4n9dJVO/aTSmPE0U4ni3qJSz7OEsFBlQQrNhYE4QF1bdCPEACYaWzm9sSyOy0SUkH4yzGsEya1YpzWbm4r5ZrN3lERXAETsA5cMAVqIE7UAcNgMETeAGv4M14Nt6ND+Nz1low8plDMAfj6xdnlJ+i</latexit>
MixMatch
<latexit sha1_base64="x1xnCbbiDjzLbxwhS5goN4KMDBg=">AAACEHicbVDLSsNAFJ34rPUV7dLNYBFclaSCuiy6cVOoYB/QhjKZTNqhk0mYmYgh5Cf8Abf6B+7ErX/gD/gdTtIsbOuBgcM59865HDdiVCrL+jbW1jc2t7YrO9Xdvf2DQ/PouCfDWGDSxSELxcBFkjDKSVdRxcggEgQFLiN9d3ab+/1HIiQN+YNKIuIEaMKpTzFSWhqbtVHxRyqIl7XpUxspPB2bdathFYCrxC5JHZTojM2fkRfiOCBcYYakHNpWpJwUCUUxI1l1FEsSITxDEzLUlKOASCctgjN4phUP+qHQjytYqH83UhRImQSungyQmsplLxf/84ax8q+dlPIoVoTjeZAfM6hCmDcBPSoIVizRBGFB9a0QT5FAWOm+FlI8mZ+WVXUx9nINq6TXbNiXjYv7Zr11U1ZUASfgFJwDG1yBFrgDHdAFGCTgBbyCN+PZeDc+jM/56JpR7tTAAoyvXw/KnbU=</latexit>
<latexit sha1_base64="a/0ULtWisjZicns+6kscEiAmNMs=">AAACWHicbZDLbtNAFIZPDPSSQpuWJZsRUdVUKpYNiLJBqoAFy6ImbaU4SseTk2SU8Yw1cwxElh+Ox4AHgC28AePUC3o50ki//nOdL82VdBRFP1rBg4eP1tY3Nttbj59s73R2986dKazAgTDK2MuUO1RS44AkKbzMLfIsVXiRLj7U+YsvaJ00uk/LHEcZn2k5lYKTt8adYaJkNi77LLFyNidurfnKooolhN+oPJtzm6OuevkR6x+yd439UVouWE+GGB6xqyuj8cXc0MHBIZv4i61Mi3p4Ne50ozBaBbsr4kZ0oYnTcedXMjGiyFCTUNy5YRzlNCq5JSkUVu2kcJhzseAzHHqpeYZuVK4gVGzfOxM2NdY/TWzl/t9R8sy5ZZb6yozT3N3O1eZ9uWFB07ejUuq8INTietG0UIwMq4n6P1sUpJZecGGlv5UJz40L8txvbJm4+rSq7cHEtzHcFecvw/hN+Orz6+7J+wbRBjyD59CDGI7hBD7BKQxAwHf4DX/gb+tnAMF6sHldGrSanqdwI4K9f4/gtPM=</latexit>
T !0
lim Sharpen(p, T ) = Dirac (i.e., “one-hot”) distribution
T !0
Berthelot, David, et al. "Mixmatch: A holistic approach to semi-supervised learning." arXiv preprint arXiv:1905.02249 (2019).
Self-training with Noisy Student
improves ImageNet classi cation YouTube Video
Xie, Qizhe, et al. "Self-training with noisy student improves imagenet classi cation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern
Recognition. 2020.
fi
fi
FixMatch: Simplifying Semi-Supervised
Learning with Consistency and Con dence YouTube Video
<latexit sha1_base64="VngCmXqjYAz+t3ON0R4E1CaC6ss=">AAACv3icbVFNb9QwEJ2Ej5bla4ET4mKxQioHVkmRCuJU4MIBoaKy3ZV2V5XjOFurjh3ZzopVtD+0f6C/g5c0SLRlJNtv3pvxjGaySisfkuQiiu/cvXd/Z/fB4OGjx0+eDp89P/G2dkJOhNXWzTLupVZGToIKWs4qJ3mZaTnNzr+2+nQtnVfW/AqbSi5LvjKqUIIHUHbIaUGBJP3G3dAxUEmK3gHVVMFztIbvgXJi9B0vB2fAGVqB2UPkMfi39KnX110Wh/o3psarwWRQdP9TDj/gMNwWt0LdCnm2y2edUsLL2xz2Eh2wvp8CrIPG8asAsz0djpJx0hm7DdIejKi3o9Ph5SK3oi6lCUJz7+dpUoVlw11QQsvtYFF7WXFxzldyDmh4Kf2y6Wa9ZW/A5KywDscE1rH/ZjS89H5TZogseTjzN7WW/J82r0PxcdkoU9VBGnFVqKg1C5a1i2O5clIEvQHgwin0ysQZd1wErPdaldy3rW0HGEx6cwy3wcn+OD0Yv/+5Pzr80o9ol17Ra6w2pQ90SN/oiCYkor3oRzSNZvHneBWbuLoKjaM+5wVds3jzB8CMt0Q=</latexit>
Pseudo-labeling
<latexit sha1_base64="yA5HrAlxfzslA3Phj/6GWXStwF4=">AAACF3icbVDLSsNAFJ3UV62vqDvdDBbBjSWpoC6LblxWsA9oQ5lMbtqhkwczE6GEgr/hD7jVP3Anbl36A36HkzQL23rgwuGc++K4MWdSWda3UVpZXVvfKG9WtrZ3dvfM/YO2jBJBoUUjHomuSyRwFkJLMcWhGwsggcuh445vM7/zCEKyKHxQkxicgAxD5jNKlJYG5lE/35EK8KZNCYkXnXPiZuuGA7Nq1awceJnYBamiAs2B+dP3IpoEECrKiZQ924qVkxKhGOUwrfQTCTGhYzKEnqYhCUA6aX5/ik+14mE/ErpChXP170RKAikngas7A6JGctHLxP+8XqL8aydlYZwoCOnskJ9wrCKcBYI9JoAqPtGEUMH0r5iOiCBU6djmrngye21a0cHYizEsk3a9Zl/WLu7r1cZNEVEZHaMTdIZsdIUa6A41UQtR9IRe0Ct6M56Nd+PD+Jy1loxi5hDNwfj6BbWAoLs=</latexit>
predefined threshold
<latexit sha1_base64="2kzjvgl8PW5kQi/L5o9NJ2Pqzy0=">AAACEXicbVDLSsNAFJ34rPUVHzs3g0VwVZIK6rLoxmUF+4A2lMnkphk6mYSZiVBLv8IfcKt/4E7c+gX+gN/hpM3Cth64cDjnXO7l+ClnSjvOt7Wyura+sVnaKm/v7O7t2weHLZVkkkKTJjyRHZ8o4ExAUzPNoZNKILHPoe0Pb3O//QhSsUQ86FEKXkwGgoWMEm2kvn1s0gGEZjvAOpKgooQHfbviVJ0p8DJxC1JBBRp9+6cXJDSLQWjKiVJd10m1NyZSM8phUu5lClJCh2QAXUMFiUF54+n3E3xmlACHiTQjNJ6qfzfGJFZqFPsmGRMdqUUvF//zupkOr70xE2mmQdDZoTDjWCc4rwIHTALVfGQIoZKZXzGNiCRUm8LmrgQqf21SNsW4izUsk1at6l5WL+5rlfpNUVEJnaBTdI5cdIXq6A41UBNR9IRe0Ct6s56td+vD+pxFV6xi5wjNwfr6BZbGnfk=</latexit>
! one-hot (hard-label)
<latexit sha1_base64="miMLfLHzKjf0YAAxK9tSx/v7P4Y=">AAACJHicbZA7TgMxEIa9PEN4BShpLCJEKIh2AQFlBA1lkAhBSqLI650kFl57Zc8C0SpH4BpcgBZuQIcoaCg5B05IweuXLP36Z0Yz/sJECou+/+ZNTE5Nz8zm5vLzC4tLy4WV1QurU8OhxrXU5jJkFqRQUEOBEi4TAywOJdTDq5NhvX4NxgqtzrGfQCtmXSU6gjN0Ubuw1TSi20NmjL6hTYRbzLSCnZ5GWuoxE+1IFoLcHrQLRb/sj0T/mmBsimSsarvw0Yw0T2NQyCWzthH4CbYyZlBwCYN8M7WQMH7FutBwVrEYbCsbfWhAN10S0Y427imko/T7RMZia/tx6Dpjhj37uzYM/6s1UuwctTKhkhRB8a9FnVRS1HRIh0bCAEfZd4ZxI9ytlDsMjKNj+GNLZIenDfIOTPAbw19zsVsODsp7Z/vFyvEYUY6skw1SIgE5JBVySqqkRji5Iw/kkTx5996z9+K9frVOeOOZNfJD3vsnE0qliw==</latexit>
<latexit sha1_base64="i2qpu0ApcFJdV3ZYsy9cMusXoOc=">AAACYnicbVHLSiNBFK20z2lfUZfjojAICtp0R9BZyszGpYJRIR3C7arbSWF1VVNVrYSQL5wvcC+zd+tsrI5Z+LpQcDj33NeprJTCujh+bARz8wuLS8s/wpXVtfWN5ubWtdWVYdhhWmpzm4FFKRR2nHASb0uDUGQSb7K7P3X+5h6NFVpduVGJvQIGSuSCgfNUv4mp0kJxVI6iYr4rDNDSQnOU1DfigtU6S52mGVKpH4681OhyRPdFhNEhHYrB8IhplQvfhOFBmoZa0UpJyFAipxwchP1mK47iadCvIJmBFpnFRb/5L+WaVYUfxiRY203i0vXGYJxgEidhWlksgd35bbseKijQ9sZTOyZ0zzOc5tr45++asu8rxlBYOyoyryzADe3nXE1+l+tWLv/VGwtVVs7f+jYor2RtTu0t5cIgc3LkATAj/K6UDcEAc/4HPkzhtl5tUhuTfLbhK7huR8lJdHzZbp39nlm0TH6SXbJPEnJKzsg5uSAdwshf8kxeyP/GUxAGm8H2mzRozGq2yYcIdl4BlEK5qw==</latexit>
L ! number of classes
<latexit sha1_base64="K2YcH4DPipzgdJ7BQbSLG85xAKI=">AAACI3icbVA5TgMxFPWwE7YAJY1FBKKKZgABJYKGgiJIZJGSKPI4fxILjz2y/wDRKDfgGlyAFm5Ah2goaDkHzlIA4UmWnt77m1+YSGHR9z+8qemZ2bn5hcXc0vLK6lp+faNidWo4lLmW2tRCZkEKBWUUKKGWGGBxKKEa3pwP/OotGCu0usZeAs2YdZSIBGfopFZ+95I2jOh0kRmj72gD4R4zlcYhGKojyiWzFmy/lS/4RX8IOkmCMSmQMUqt/FejrXkag8LhjHrgJ9jMmEHBJfRzjdRCwvgN60DdUcVisM1s+J8+3XFKm0bauKeQDtWfHRmLre3FoauMGXbtX28g/ufVU4xOmplQSYqg+GhRlEqKmg7CoW1hgKPsOcK4Ee5WyrvMMI4uwl9b2nZwWj/nggn+xjBJKvvF4Kh4cHVYOD0bR7RAtsg22SMBOSan5IKUSJlw8kCeyDN58R69V+/Nex+VTnnjnk3yC97nN9jnpYI=</latexit>
on unlabeled data
FixMatch
<latexit sha1_base64="H/m3o+EUyXykbSJTZAxpMP5TnWc=">AAACEHicbVDNSsNAGNzUv1r/oj16WSyCp5JUUI9FQbwIFWwttKFsNpt26WYTdjdiCHkJX8CrvoE38eob+AI+h5s0B9s6sDDMfN/Ox7gRo1JZ1rdRWVldW9+obta2tnd298z9g54MY4FJF4csFH0XScIoJ11FFSP9SBAUuIw8uNOr3H94JELSkN+rJCJOgMac+hQjpaWRWR8Wf6SCeNk1fbpFCk9GZsNqWgXgMrFL0gAlOiPzZ+iFOA4IV5ghKQe2FSknRUJRzEhWG8aSRAhP0ZgMNOUoINJJi+AMHmvFg34o9OMKFurfjRQFUiaBqycDpCZy0cvF/7xBrPwLJ6U8ihXheBbkxwyqEOZNQI8KghVLNEFYUH0rxBMkEFa6r7kUT+anZTVdjL1YwzLptZr2WfP0rtVoX5YVVcEhOAInwAbnoA1uQAd0AQYJeAGv4M14Nt6ND+NzNloxyp06mIPx9QsEcZ2u</latexit>
xb ! training example
<latexit sha1_base64="5pgG+tp7SG4PwCgXo4H8ZmoSmP4=">AAACJHicbVC7TgJBFJ31Lb5QS5uJxGhFdtWopdHGUhMBEyDk7nCBibOzm5m7CtnwCf6GP2Crf2BnLGws/Q4HpFDwJJOcnHNfc8JESUu+/+FNTc/Mzs0vLOaWlldW1/LrG2Ubp0ZgScQqNjchWFRSY4kkKbxJDEIUKqyEt+cDv3KHxspYX1MvwXoEbS1bUgA5qZHf7TZCXjOy3SEwJr7nNcIuZWRAaqnbHLsQJQr7jXzBL/pD8EkSjEiBjXDZyH/VmrFII9QkFFhbDfyE6hkYksLNy9VSiwmIW2hj1VENEdp6NvxQn+84pclbsXFPEx+qvzsyiKztRaGrjIA6dtwbiP951ZRaJ/VM6iQl1OJnUStVnGI+SIc3pUFBqucICCPdrVx0wIAgl+GfLU07OK2fc8EE4zFMkvJ+MTgqHlwdFk7PRhEtsC22zfZYwI7ZKbtgl6zEBHtgT+yZvXiP3qv35r3/lE55o55N9gfe5zdVzqZV</latexit>
`s ! supervised loss
<latexit sha1_base64="HOQdjjOpUIomBao3oKBnlJXFjPs=">AAACJnicbVDLSgNBEJz1GeMr6tHLYBD0EnZV1GPQi0cFo0I2hNlJJxmc3Vmme6NhyTf4G/6AV/0DbyLePPkd7sYcNLFgoKjqpnoqiLVCct0PZ2p6ZnZuvrBQXFxaXlktra1foUmshJo02tibQCBoFUGNFGm4iS2IMNBwHdye5v51DywqE11SP4ZGKDqRaispKJOapV0ftG4i963qdElYa+64T3BPKSYx2J5CaHFtEAfNUtmtuEPwSeKNSJmNcN4sffktI5MQIpJaINY9N6ZGKiwpqWFQ9BOEWMhb0YF6RiMRAjbS4ZcGfDvJg9vGZi8iPlR/b6QiROyHQTYZCuriuJeL/3n1hNrHjVRFcUIQyZ+gdqI5GZ73w1vKgiTdz4iQVmW3ctkVVkjKWvyT0sL8tEExK8Ybr2GSXO1VvMPK/sVBuXoyqqjANtkW22EeO2JVdsbOWY1J9sCe2DN7cR6dV+fNef8ZnXJGOxvsD5zPb0hfp14=</latexit>
pb ! one-hot-labels
<latexit sha1_base64="CbobAbbQiLiItlmuEqE7RzMAJUg=">AAACInicbVDJSgNBEO1xN25Rj14ag+DFMKOiHoNePCqYBZIQejo1mcae7qG7Rg1DvsDf8Ae86h94E0+CZ7/DznJwe1DweK+KqnphKoVF33/3pqZnZufmFxYLS8srq2vF9Y2a1ZnhUOVaatMImQUpFFRRoIRGaoAloYR6eH029Os3YKzQ6gr7KbQT1lMiEpyhkzrFnbQT0pYRvRiZMfqWthDuMNcK9mKNe5KFIO2gUyz5ZX8E+pcEE1IiE1x0ip+truZZAgq5ZNY2Az/Fds4MCi5hUGhlFlLGr1kPmo4qloBt56N3BnTHKV0aaeNKIR2p3ydylljbT0LXmTCM7W9vKP7nNTOMTtq5UGmGoPh4UZRJipoOs6FdYYCj7DvCuBHuVspjZhhHl+CPLV07PG1QcMEEv2P4S2r75eCofHB5WKqcTiJaIFtkm+ySgByTCjknF6RKOLknj+SJPHsP3ov36r2NW6e8ycwm+QHv4wsuAKUr</latexit>
`u ! unsupervised loss
<latexit sha1_base64="wjETlS/KV7VX6eqZAn/gpkQGEPw=">AAACKHicbVDLSgMxFM34rPVVdekmWARBKDMq6lJ047KC1UKnlEx62wYzyZDcqGXoR/gb/oBb/QN34taF3+FM7cK2HggczrmXc3OiRAqLvv/pzczOzS8sFpaKyyura+uljc0bq53hUONaalOPmAUpFNRQoIR6YoDFkYTb6O4i92/vwVih1TX2E2jGrKtER3CGmdQq7YcgZcvR0IhuD5kx+oGGCI+YOmVdAuZeWGhTqa0dtEplv+IPQadJMCJlMkK1VfoO25q7GBRyyaxtBH6CzZQZFFzCoBg6Cwnjd6wLjYwqFoNtpsNPDeiuy4M72mRPIR2qfzdSFlvbj6NsMmbYs5NeLv7nNRx2TpupUIlDUPw3qOMkRU3zhmhbGOAo+xlh3IjsVsp7zDCOWY9jKW2bnzYoZsUEkzVMk5uDSnBcObw6Kp+djyoqkG2yQ/ZIQE7IGbkkVVIjnDyRF/JK3rxn79378D5/R2e80c4WGYP39QMjDKhX</latexit>
! pseudo-label
<latexit sha1_base64="rC/rKUQSRmAB29FP0uOj57iGBvg=">AAACHHicbVDLSgNBEJyN7/iKevTgYBC8GHZV1KPoxWME84AkhNnZTjJkdmeZ6VXDkqO/4Q941T/wJl4Ff8DvcPI4mMSChqKqm+4uP5bCoOt+O5m5+YXFpeWV7Ora+sZmbmu7bFSiOZS4kkpXfWZAighKKFBCNdbAQl9Cxe9eD/zKPWgjVHSHvRgaIWtHoiU4Qys1c3t1LdodZFqrB1pHeMQ0NpAE6kgyH2S/mcu7BXcIOku8McmTMYrN3E89UDwJIUIumTE1z42xkTKNgkvoZ+uJgZjxLmtDzdKIhWAa6fCRPj2wSkBbStuKkA7VvxMpC43phb7tDBl2zLQ3EP/zagm2LhqpiOIEIeKjRa1EUlR0kAoNhAaOsmcJ41rYWynvMM042uwmtgRmcFo/a4PxpmOYJeXjgndWOLk9zV9ejSNaJrtknxwSj5yTS3JDiqREOHkiL+SVvDnPzrvz4XyOWjPOeGaHTMD5+gX1kaMB</latexit>
Consistency Regularization
<latexit sha1_base64="lGcpTrxly2mD6OOFbIy56UTToZQ=">AAACInicbVDLTgIxFO3gC/GFunTTSEhckRlM1CWRjUs08kiAkE7nAg2dzqTtmIwTvsDf8Afc6h+4M65MXPsddmAWAp6kycm55z563JAzpW37y8qtrW9sbuW3Czu7e/sHxcOjlgoiSaFJAx7IjksUcCagqZnm0AklEN/l0HYn9bTefgCpWCDudRxC3ycjwYaMEm2kQbHcm81IJHjTeiCU2QiCxvgORhEnkj1mvpJdsWfAq8TJSAllaAyKPz0voJEPQlNOlOo6dqj7CZGaUQ7TQi9SEBI6ISPoGiqID6qfzE6Z4rJRPDwMpHlC45n6tyMhvlKx7xqnT/RYLddS8b9aN9LDq37CRBiln5wvGkYc6wCn2WCPSaCax4YQKpm5FdMxkYRqk+DCFk+lp00LJhhnOYZV0qpWnIvK+W21VLvOIsqjE3SKzpCDLlEN3aAGaiKKntALekVv1rP1bn1Yn3Nrzsp6jtECrO9fRwel0A==</latexit>
! both ↵ and pm are stochastic functions, so the two terms in this equation will indeed have di↵erent values
<latexit sha1_base64="/gjf0Ur+Z8/XeuFTVkdA3ZN3+zM=">AAACjHicbVFdb9MwFHXCx0aBrcDjXizaSTygKoFpICGkCSTE45DoNqmpqhvnprHm2Jl901JF+Q37ffsD/APecdo+sI0rWT465375OK2UdBRFN0H44OGjxzu7T3pPnz3f2++/eHnmTG0FjoVRxl6k4FBJjWOSpPCisghlqvA8vfza6ecLtE4a/ZNWFU5LmGuZSwHkqVn/OrFyXhBYa5Y8IfxFTWqo4MMEVFXAkIPO+LCabaSy9YRF7siIAhxJwfNai66Ve8ud4VQgp6W/0ZaOS+0J6The1etxfCmV8myGmPECFsgzmedoURNfgKrRtbP+IBpF6+D3QbwFA7aN01n/d5IZUZe+h1Dg3CSOKpo2YP1yCtteUjusQFzCHCceaijRTZu1cS0/9EzGc2P98Tus2X8rGiidW5WpzyyBCndX68j/aZOa8o/TRuqqJtRiMyivFSdvjf8F/26LgtTKAxBWdkZ6Qy0Ib9ztKZnrVmt73pj4rg33wdm7UXw8ev/jaHDyZWvRLjtgr9kbFrMP7IR9Z6dszAT7ExwEw+Aw3AuPwk/h501qGGxrXrFbEX77C267yC4=</latexit>
Sohn, Kihyuk, et al. "Fixmatch: Simplifying semi-supervised learning with consistency and con dence." arXiv preprint arXiv:2001.07685 (2020).
fi
fi
Training Data-E cient Image Transformers
& Distillation Through Attention YouTube Video
<latexit sha1_base64="I56gSfpm7wq++2xjx5UfI4O5+SI=">AAACM3icbVDLSiNBFK32MTrRGaOznE0xQdDFhG4D0WVQRxzBx4BJhBjC7eobU6S6uqm6LYaQP/E3/AG3+gMyOxF3/sNUHovxcaDgcM693FMnTJW05PsP3tT0zOynufnPuYXFL1+X8ssrNZtkRmBVJCoxZyFYVFJjlSQpPEsNQhwqrIfdnaFfv0RjZaJPqZdiM4YLLdtSADmplS//dgIeIfFdIODHWvX42lHCf10RGg1qLNtMdDhYfrB3+rPk+4frrXzBL/oj8PckmJACm+CklX8+jxKRxahJKLC2EfgpNftgSAqFg9x5ZjEF0XVhGo5qiNE2+6P/DfiqUyLeTox7mvhI/X+jD7G1vTh0kzFQx771huJHXiOj9lazL3WaEWoxPtTOFKeED8vikTQoyFUSSRBGuqxcdMCAcOW8vhLZYbRBzhUTvK3hPaltFINysfRno1DZnlQ0z76zH2yNBWyTVdg+O2FVJtg1u2V37N678f56j97TeHTKm+x8Y6/gvfwDI3mosw==</latexit>
<latexit sha1_base64="IybaAzzAudkwjY2vkSQdiV6jIEo=">AAACPXicbVDLSgMxFM34rPVVdekmWAQFGWYqqLiqduNGUbE+qKVkMrdtMJMMyR2hlP6Ov+EPuFXwA3Qlbt2a1i58XQgczj33npsTpVJYDIJnb2R0bHxiMjeVn56ZnZsvLCyeW50ZDlWupTaXEbMghYIqCpRwmRpgSSThIrqp9PsXt2Cs0OoMOynUE9ZSoik4Q0c1CuUzYLwNhh7qGOQu3aMWjVYtKpwQKJfMWid3gjXwW/4GPYXWEeAVrWh168B6o1AM/GBQ9C8Ih6BIhnXcKLxex5pnCSgcbK+FQYr1LjMouIRe/jqzkDJ+4+xrDiqWgK13Bz/t0VXHxLSpjXsK6YD9PtFlibWdJHLKhGHb/u71yf96tQybO/WuUGmGoPiXUTOTFDXtx0ZjYYCj7DjAuBHuVsrbzDCOLtwfLrHtn9bLu2DC3zH8BeclP9zyN09KxfL+MKIcWSYrZI2EZJuUyQE5JlXCyR15II/kybv3Xrw37/1LOuINZ5bIj/I+PgGkka2b</latexit>
Hard-label Distillation
<latexit sha1_base64="wmog2ozQiFkgesKiD8zN9eZ5FP8=">AAACH3icbVDLSsNAFJ3UV62vqks3Q4vgxpJUUJdFXXRZwT6gDWUyuWmHTh7MTIQSuvc3/AG3+gfuxG1/wO9wkmZhWw8MHM65d+7hOBFnUpnm3ChsbG5t7xR3S3v7B4dH5eOTjgxjQaFNQx6KnkMkcBZAWzHFoRcJIL7DoetM7lO/+wxCsjB4UtMIbJ+MAuYxSpSWhuXKIPsjEeDOmkS4l5w4wPGDvsw4z4eqZs3MgNeJlZMqytEaln8GbkhjHwJFOZGyb5mRshMiFKMcZqVBLCEidEJG0Nc0ID5IO8lyzPC5VlzshUK/QOFM/buREF/Kqe/oSZ+osVz1UvE/rx8r79ZOWBDFCgK6OOTFHKsQp8Vglwmgik81IVQwnRXTMRGEKl3f0hVXptFmJV2MtVrDOunUa9Z17eqxXm3c5RUV0RmqoAtkoRvUQE3UQm1E0Qt6Q+/ow3g1Po0v43sxWjDynVO0BGP+C89po+w=</latexit>
Use a lower training resolution and fine-tune the network at a larger resolution
<latexit sha1_base64="DYB4cfkStGw8y6alA9g/Tp/ah7M=">AAACV3icbVDLSgNBEJysrxhfUY9eBoPgxbCroB6DXjwqGBWTEHpnO8mQ2ZllplcJIf/mb/gDXvUPdDYGfDYMFNVdVE3FmZKOwvC5FMzNLywulZcrK6tr6xvVza0bZ3IrsCmMMvYuBodKamySJIV3mUVIY4W38fC82N8+oHXS6GsaZdhJoa9lTwogT3Wr921tpE5QE2865MCVeUTLyYLUUve5RWdUXtxy0AnveZsDyjVyGiDXSI/GDjlQIQTb98ovQbdaC+vhdPhfEM1Ajc3mslt9aSdG5KkPIxQ414rCjDpjsCSFwkmlnTvMQAyhjy0PNaToOuNpBxO+5xmfz1j//Gem7HfFGFLnRmnsL1Oggfu9K8j/dq2ceqedsdRZTqjFp1EvV5wMLwrlibQoSI08AGGlz8rFACwI8rX/cElcEW1S8cVEv2v4C24O69Fx/ejqsNY4m1VUZjtsl+2ziJ2wBrtgl6zJBHtiL+yVvZWeS+/BYlD+PA1KM802+zHB5gdF1beH</latexit>
⌧ ! temperature
<latexit sha1_base64="DnsbErU1x04JEN1mEbJKm9PLs/o=">AAACIHicbVBLSgNBFOzxG+Mv6tJNYxBchRkVdRl04zKC+UAmhDedl6Sx50P3GzUMOYDX8AJu9QbuxKUewHPY+SzUWNBQVNXjva4gUdKQ6344c/MLi0vLuZX86tr6xmZha7tm4lQLrIpYxboRgEElI6ySJIWNRCOEgcJ6cHMx8uu3qI2Mo2saJNgKoRfJrhRAVmoXij5Byn0te30CreM77hPeU0YYJqiBUo1Dm3JL7hh8lnhTUmRTVNqFL78TizTEiIQCY5qem1ArA01SKBzm/dRgAuIGeti0NIIQTSsbf2bI963S4d1Y2xcRH6s/JzIIjRmEgU2GQH3z1xuJ/3nNlLpnrUxGSUoYicmibqo4xXzUDO9IjYLUwBIQWtpbueiDBkG2v19bOmZ02jBvi/H+1jBLaocl76R0dHVcLJ9PK8qxXbbHDpjHTlmZXbIKqzLBHtgTe2YvzqPz6rw575PonDOd2WG/4Hx+A3U/pOA=</latexit>
<latexit sha1_base64="sIyvJKDX/GCs7uXU7xAzTb4BqEQ=">AAACU3icbVBNT9tAEN0Y2tK0KQGOvayaVEovkQ1S2yOCC1IlBBL5kJIoWq8n8SrrtdkZI4Uo/4y/wYErh17gH3BhHXwopE8a6enNjObNCzOtkHz/ruJtbL57/2HrY/XT59qX7frObhfT3EroyFSnth8KBK0MdEiRhn5mQSShhl44Oy76vSuwqFJzQfMMRomYGjVRUpCTxvXu0KTKRGCI/wHIOMXAlZsBngmSMUd1DbhSUSTAm0OVOFuATd48bfIWwmUORgLXYKYU/+AyFmYKOK43/La/Al8nQUkarMTZuP53GKUyT5wRqQXiIPAzGi2EJSU1LKvDHCETcuacDRw1zgyOFqv/l/y7UyI+Sa0r98hK/XdjIRLEeRK6yURQjG97hfi/3iCnye/RQpksJ/fly6FJrjmlvAiTR8qCJD13REirnNciACskuchfXYmwsLasumCCtzGsk+5+O/jZPjjfbxwelRFtsa/sG2uxgP1ih+yEnbEOk+yG3bMH9li5rTx5nrf5MupVyp099gpe7RlpNbPo</latexit>
Keep the image patch sizes the same =) N (sequence length) changes
Zs , Zt ! student and teacher logits
<latexit sha1_base64="Z4vU5O1jbDhO/yl7c1/iCAOO4fs=">AAACM3icbVDLSgMxFM34tr6qLt0Ei+BCyoyKuhTduFSwKrZluJO5bYOZyZDcUcvQP/E3/AG3+gPiTsSd/2Bau/B1IHA493VyokxJS77/7I2Mjo1PTE5Nl2Zm5+YXyotLZ1bnRmBNaKXNRQQWlUyxRpIUXmQGIYkUnkdXh/36+TUaK3V6St0Mmwm0U9mSAshJYXnnMrQb/DIk3jCy3SEwRt/wBuEtFZbyGFPikMacEEQHDVe6Lcn2wnLFr/oD8L8kGJIKG+I4LL83Yi3yxO0TCqytB35GzQIMSaGwV2rkFjMQV9DGuqMpJGibxeB/Pb7mlJi3tHHP+Rmo3ycKSKztJpHrTIA69netL/5Xq+fU2msWMs1ywlR8HWrlipPm/bB4LA0KUl1HQBjpvHLRAQOCXKQ/rsS2b61XcsEEv2P4S842q8FOdetku7J/MIxoiq2wVbbOArbL9tkRO2Y1Jtgde2CP7Mm79168V+/tq3XEG84ssx/wPj4BhFOr9w==</latexit>