commit 4faabe5409302bbb175801cbfdc317777d86f002 Author: Sebastian Seedorf Date: Tue Nov 10 18:31:01 2020 +0100 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4139faf --- /dev/null +++ b/.gitignore @@ -0,0 +1,404 @@ + + + +# Created by https://www.toptal.com/developers/gitignore/api/intellij,latex +# Edit at https://www.toptal.com/developers/gitignore?templates=intellij,latex + +### Intellij ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/* + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Intellij Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +### LaTeX ### +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Comment the next line if you want to keep your tikz graphics files +*.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib + +### LaTeX Patch ### +# LIPIcs / OASIcs +*.vtc + +# glossaries +*.glstex + +# End of https://www.toptal.com/developers/gitignore/api/intellij,latex + + +!out/.gitkeep diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..ddda9a7 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,8 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..1763e15 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..6cd59d1 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/thesis.iml b/.idea/thesis.iml new file mode 100644 index 0000000..0dde4f4 --- /dev/null +++ b/.idea/thesis.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/data/Darknet.drawio b/data/Darknet.drawio new file mode 100644 index 0000000..1e1f72d --- /dev/null +++ b/data/Darknet.drawio @@ -0,0 +1 @@ +7V1tk9q2Fv41zGw+JOMXbMPHsJtNem+aZrK9t+2njgEtuGssasyy9NdXkt9kSYDxGpmFk8kk1pEsyTov0qOjI3r27eLlc+wv5z/jKQp7ljF96dl3Pcsy+5bVo3+N6TalDEwzJcziYJoVKgkPwT8oIxoZdR1M0apSMME4TIJllTjBUYQmSYXmxzHeVIs94rDa6tKfIYnwMPFDmfpbME3m2VdYXkn/goLZPGvZMoys4ws/L5wRVnN/ijccyf7Us29jjJP0afFyi0I6ePm4pO/d78gtOhajKKnzAvJ+srd/xMNw+G2y/vLTZur+59v7flrLsx+usw/OOpts8xGYJ4uQPJk9e4Si6Uc6piQZ4QillPsgpPkGSZFPXNK3wiB6Ikm5h1mn0bQy6Fl/PyO8QEm8JQU25VA72fDNuVHOaTEK/SR4rrLKzzg+K6orWviOA9ITy8iks5/zKpfNoVGtYoXX8QRlb/FDe6AiS6gn8eMZSqR6yAP31SWJMe4IJjrXzESrOvT2oCUeShWdmImugoluSMZqtCYPs4SxIiWMc8KdHz9FKLnF0XORF4ulSW+KF24egzBB8apn3VL5oNY2fUrizMyaKWHsJ5P5nxGOF4T2a7xG77jquA4JYpagF0rnpI3UjJ/QLQ5xXAoc6UUokPwwmEUkOSFihgh99IziJCB2+GOWsQimU9rMaDMPEvSw9Ce0zQ2ZdQgtxutoiqaZBO8UWVonetkrtIU0VIQhT3IibVsKmbaN3eJbEZhjpcNrV8UrTOmRbrM/LMePk/zdcYgnT0XxfPqjDfjRZI5jpgt3QUzm3QBHWdYEL4JJ1s5fwWzGumfuZcvZWJIq002zoSkRKzqdJdl8+8t4DH/ZjqO/jdl/rdnHLw/Oe1MhK4LhoBbDulNZCNmOHLAZ787aCEgarxDAukbAtDRaASVnrYuxAoLK1+JKR1ag6aJQqsjVawbkpf2GJHu3dm/o0aHjEq6/oAoUjVf0v6lqLSGIWalr5g5t5ASxBVUcqi0qJxJDnYoor7h3jq3xKA8nKXxvicOuZsa8ftG8ndXSjyrccv9eU5BJlJKp+keqJLOxf2PQbjJbrnx6Rx9ptQQ1R8n7R38RhNv09QWO8Irxu1KETQ6sgLF8KdstpcgRdwocMuiUymBwkcqZ4DA2EModfaYdc+i4O4R3h8qaRdlcyhpVY5XVpMwucug/dLwJgc6rDvtGmhjRpfM3unLmS8R8ET6H9WMs5qZNpiwsmmQTKy3BdIuSTZbkDTmlMmtPM8optkJOp1lKyiZaSqxOtTQzm2xpJqfgNIepOKXnZoASDaHzmb7X4UiRVTCjtAAOtQFFyX7JPGPLscng6KlRKPJsjuG5dSgzuTxiJQr6jGtfFBWWLORlv/zYe+VHyWWBucUsXmFhPpOXAy8KAa0rn9HT/GJOp9nprF6++FsxaGmzyrm9zE7n97L1bI5nBURBYHN9C2Ig8NXZw1Y+r1gPqLqwj+dFL7IpuiqJFZHjRdHbIVKVdYPKAB3XTH9HM5VVxU7h5Qa2gVT3j5dq3lKkIrTLpgjyzzV1CoMyrDG8guANddgTRqzOijnr0umTA2/phJ/OsY/ntlQrlr1drdVUWycCHOam7Dq7aFeDeAsI0xniHQDi1Y542QzWBuItxEkT4h0eFpaOjaFqI1mrMcy3NPdZw6/If9r+QF//B9awyjzFroNea5j7zsEcajSHttOSObRP5xZWS4t19vbQVmyp67WH9uExyh3ik3UcbkexP3lCyeHBqo7sYxgs/5+pA33+0t6Q2tLhA3lMVVbKFOW6vUGVt0d5f/L95c4YtjhluI7Ei76CF6JlaI8V8gI69duxcZbm9yJP1gKZR/XHPkar4B9/zKqiI72kJo99qjPqOXe0rnWC093j9tSiwgkVDHV1WhpLddSq5gGPH2T8pms/rOewfXMnNdpcpPU7h6xWjQ0HxcGdA074aeoHoo4Z83pc7v3Ol9yWbEAvx43a7xqO2jWWXwBoXglozOHwg1Ple9MdHkVVmvd4bNVh1zasK7Ws9vVYVkexHtJrWW35xGt9y3p/dlDW6RrK2pdzRvQt2VKn6faQoirNG0S2vGFeXwPPS/tcxbJfq/bl5hW0T6v2uSJDm2tfIUK6zmfW2MzPtx4XLzPq9f8wXi6iDxmRV6h8afHVH6PwO14FGbvHOEnwghQIacbInzzNmKJygvXI/iiWJwleqlYxSxQH5NPp8108x4vxevW9JI3GRRskf+YnaOOTQRrhdRIGERHoPErPKImlDqy2izGm30Qkzg9DFH7etGMtzIHAa1VAh1ZzUSPwrmuT2vUh237LcW1gUmvZQa9p2IvCpGqOfOmrgugEieHEhLH613kQVVWJMushewHHyRzPcOSHn0rqCL0Eye95afL8B2U9+fY0dfeSSQJLbPNEIYzmGxAL0XfZb+wEFeIzXcepJRKER/6WK5a5DHZ2WPSC5R3e1S+xfN6vUiLTHrQrn7u3hg/6AEbpgd3jozyZlK/eXvzm63Y7OgzgVLMeTrKd3mq5bUVwihXpnsdUJ9levclZsQo2H8RJmrAueevz7AI5c3kHa6DTGjSN5JQq0hzJ6cg7BW/YB9k/t1hOSzW6EJ8J8ZnFM8Rn1g2ngvhMiM/MSkN8prIZiM+E+MyzWn91HqDpqA6BtXMQ9loQbfeBmk7LN0kCoq0BRBtHakoVaT7F59Rw1HRtFzuP1XRqHOiarOPnYow4pZqE/mpFpbzi2MrcVsYHL/dVpZ4r1/X2+65IgnPyZ/4s8o18bTSdVmfbdk4o62OpLZ8Sa6yrfWQAmJjXmFJSMT5YsDN9FkWuqZdNrMgUDUNLXjbPUuvIrn6J5fMtuJN62RyVq6XOBk3qKbvcVYPEPUOOTlNJ+8mi05zdfo6D/lCmuhAOdWhlqIqw1bsyzOEFrAw1rgzttnwdtmZfh1vj9trjXZ+2dQUBHmcXCelaoPnaNb9xbJdUkWZM6MrbQG/Zy+ntsKOcVBTHU/Ro4/4LgSW3ZjnydmGCz9Z1adsWeC7BcwmeS/BcNhAD8FyC5/LKPZd2OckLHsuzW0qpbq3QvJSqfwlB5dj+AZRKV2gXf8HL2d3w4ta4NhRQ6itRqtNe3LyiKs1x8568r5GdGN2Npiq5BtV0QFSAqABRAaICRFXmAaICRAWISjuiUt1dpRdRefXPgB6DqEx6fSNdH11TeGP3V7t5cBi0C0jV+DIkRVWaL0PylL+oeN/fC6n4XIOpOmAqwFSAqQBTAaYq8wBTAaYCTKUdU6luJNWMqVRBNq/HVBYN3qHro8EVYSrlhZF6MRVc79wFpmp8G6ICU2m+RcpT/nTF/WAvpuJzDabqgKkAUwGmAkwFmKrMA0wFmAowlX5MpTgsphlT1b+Z8xhM5dDfar02TOUpvI6af13eAEzVBaZqGpyqqEpzeOpAeRXnvenuBVWVbIMpO6AqQFWAqgBVAaoq8wBVAaoCVKUdVXmdh6YPrJOgKtOw+tcHqwad3/sxgF917gJWDZre/KGoSvPdHwP5wCiFVdSG7oFVlWwj1XbAVYCrAFcBrgJcVeYBrgJcBbhKO64adH5PxQB+dFDDBXwDge1tXcDnnegSZ7HDeTsnvZR5ALdsnF4Ui52ctkXRFT12bd0n7qrbOako5t/CieKNccEbRNKPmfbla79V+0Mnu/Z7KO8P3VzynUfSDNE5A+TtlptLjpCW7GLnDKjx+yfHzIZzf0nfCoPo6Q1MU33BIBW/xX7sNOUNDlTUeNuyl0EPrni5Z2d/+hc= \ No newline at end of file diff --git a/data/Darknet.png b/data/Darknet.png new file mode 100644 index 0000000..7409a1c Binary files /dev/null and b/data/Darknet.png differ diff --git a/data/Darknet.svg b/data/Darknet.svg new file mode 100644 index 0000000..d6c7aaf --- /dev/null +++ b/data/Darknet.svg @@ -0,0 +1 @@ +
DarknetConv
(filters, size, strides=1, batch_norm=True)
DarknetConv...
Conv2D
(filters, size, strides)
Conv2D...
w × h × d
w × h × d
w × h × f
w/2 × h/2 × %3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphMof
w × h × f...
BatchNorm
BatchNorm
LeakyReLU
LeakyReLU
batch_norm?
batch_...
stride=1
stride=2
stride=1...
DarknetResidual
DarknetResidual
DarknetConv
(d/2, 1)
DarknetConv...
w × h × d
w × h × d
DarknetConv
(d, 3)
DarknetConv...
w × h × d/2
w × h × d/2
w × h × d
w × h × d
DarknetBlock
(filters, blocks)
DarknetBlock...
DarknetConv
(filters, 3, stride=2)
DarknetConv...
w × h × d
w × h × d
w/2 × h/2 × %3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphMof
w/2 × h/2 × %3C...
DarknetResidual
DarknetResidual
 × blocks
 × blocks
Darknet-53
Darknet-53
DarknetConv
(32, 3)
DarknetConv...
w × h × d
w × h × d
w × h × 32332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w × h × 32332CmxGr...
DarknetBlock
(64, 1)
DarknetBlock...
w/2 × h/2 × 64332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/2 × h/2 × 64332C...
DarknetBlock
(128, 2)
DarknetBlock...
w/4 × h/4 × 128332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/4 × h/4 × 128332...
DarknetBlock
(256, 8)
DarknetBlock...
w/8 × h/8 × 256332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/8 × h/8 × 256332...
DarknetBlock
(512, 8)
DarknetBlock...
w/16 × h/16 × 512332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/16 × h/16 × 5123...
DarknetBlock
(1024, 8)
DarknetBlock...
w/32 × h/32 × 1024332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/32 × h/32 × 1024...
(0)
(0)
(1)
(1)
(2)
(2)
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/data/YOLOv3.drawio b/data/YOLOv3.drawio new file mode 100644 index 0000000..92221fa --- /dev/null +++ b/data/YOLOv3.drawio @@ -0,0 +1 @@ +7V1tc6M4Ev41rkpSlS2EjG0+5mWys3c7M3ubupra/XJFbGIzweDFOInn15/EO1LbJgQEsXtqkhhJSHJ3q7uf1tuA3ixffw2s1eKLP7Pdga7NXgf0dqDrZKjrA/5fm23jlMmQxgnzwJklhfKEe+ennSRqSerGmdnrUsHQ993QWZUTp77n2dOwlGYFgf9SLvbou+VWV9bclhLup5Yrp353ZuEi+Rb6OE//bDvzRdKyrmlJx5dWWjhJWC+smf9SSKKfBvQm8P0w/rR8vbFdTryULvF7dztys44FthdWecEe/0a3fwWma36dbj7/9jIb/evr5TCu5dlyN8kXTjobblMKLMKlyz6RAb22vdkVpyl79HzPjlPuHJfna+yJfcUVf8t1vCf2KPcw6bQ9KxE96e+vtr+0w2DLCrzkpDYS8i0KVE7TAtu1Que5zCor4fg8qy5r4Q/fYT3RtUQ6qWHGrySySQxilOtY+5tgaievFWl7oKbRWKgotIK5HUoVsQ+F750nRax7AxuNU2bjUCvTfqLVY+JEO1BRy0wcAUwcuYxW1xv2YR5GrIgTHtKEv779/u2ZZsmBWJB1JCt7lrJjSFhTN0VmZCkzexUVoeeFGgrNC0IV2q88vSBb6zDwn+wb3/WDXLwemWwJSZbrzD32OGVCZbP062c7CB2md6+SjKUzm/Fmrl8WTmjfr6wpb/OFWRmWFvgbb2bPEnndKaC8Tvt1r4jCMpQ+FgSY6oAEU223sJbE462yMG52QJeYMmDdjv5FOVYQpu8+uP70KSuemjvegOVNF34QSf6tEzA76/hekjX1l840aeeHM59H3SN72dITvaEL452QmopDrKg9vfHy9Yf26H7bPnj/aPN/6/Orz/fGJQFkRVATt1bw5NnhpUEhxQCpjxMZ6ERXONJB7uk40pWPdImbdUc6Gakd6rK7/sIeBzd0YI456YoPM0mO8sFEdgy3gqQ1MNYmsFos8DxTuUqGmuwlM+rdUV0goZBCNH3YN1JmctgZLSEHRfRNmRa68b3naibnzCD6+fHaHSKqIMDFzLiqhIMTNDzKDQ9tysVU7WOatVQnG9K905ydW6G0sUOq89smXG3C6sozBvA0/jN1rfU6ipuSE1KqFPDm1SpVAoEx1KrtalXDaEirUsXuvA6F3w+G/AS36kDQj41iNlTX53uLn16Eb9w98K/gQidxmzd40Qm7YytwzLpf4OcEQESK+Sk71DvB+chacqJ4D2v+Z1YhAKfabTKF4TKSyWuqpC6laFhbN6zENH8xynw3a5pWoCrFxpVCk6JN6deL1N0+IQ0LICe1GpbKM6S7w5+PfVOhEyDio1aF4qRiFyp0Mm5MhWYipEqFgmGfHQNOv+jdkDMBJ1/pkEv5hUNO6ZAz6wYEgKp0tUNuuBsV7g8JlMOF1YICsQ8TC8W6FDg8ueU/fYsODHF6pn3NYYwEttdFO1JFqtcFyIYasc57FED34SQDXQf1CqC2ry5VpNhTN+R5pw8cfByKwXog+qh46ZC+l75lkgqwKCXwemV5JbqO/tnwzR9s/ESj8orL8/zBOtP4+5HKBT+d84+cztqj74WXj9bScbfx60vf89cRZ0pF1tG2Gl5AW73m7eb8NsQdPAajDU+NtqdkTymtjIhaLOWWf+YdMzh1DEbiQ2VJVjYVkVrV6Hk1MUuyHP6L05slcNtnRN+RP1xb4XTx1Q+WpRJBsUgxJ+rHg5gbNxmzMGsysoK8RDQKeDKJHos6l6dGipln5PawlBzbRJ6UWEWeWLaLPDOxjDyzMBR5TjQYeXo6YHmiJnQ+GYZVOJJlZczIx6rBR2tWcpgzT9sW2KQV0uPBm+XRAsPTQZxnFvLYYM7S54X2RVGJHjN52S8/dK/8gFwWmJsZ3BILU6ObE14UAl5Xanzj/Mz88uzYAOcvfs+IFjcLmuE8OzbFeeuJOY4KiIIQ2eAGxEDgq7GHrcW8zHRDXdjH86wXiTUtS2JJ5IqiON4hUiUTDymgtzUz3NFMyQHYKbwFwtaQ6uHbpbqoKWIR2qVTBPkvNNWGQjErkFcQPFOFPokSy1YxZV1sPgvYKjb4sY3tPjYqOlXQfIRip2o3aj28/IX9TK3Q9tgProR5F9YlWucze6l/j2BXIdglpG6YXK5JcZR8VH3PG66dqqABCACwFWsAGWCjBmhdA9TeCCfXpCnWAPJ6ML4jQReCXqUEKfDVM4+MQLtR1bpkI3CH4R4qijTm6256Hu6iVMdoF0a7MNqF0a4aYoDRLox2nXi0i1J9R5Srfy5V9zvlRxXWof93tWZelet4c/22Ilg95s3yEj5VeiATzEUoVon4tGF8aojLL0ntHfNAXao3zY/lmEbFRQMIohBEDRBEIYjK6kIQhSAKFl4EUQii2gZRnR+aM4a2fwsgClgRUMZQJwOXoHNw1MKlccMHiyNcqgiXau9gAepSvIllDJ7PWAUunbEBf/14jqCpkkVF0ISgCUETgqZDPEfQhKAJEjwETZVAE+1809p4940iO08TrLZE8pTmnobADKJiMIUH93QCpoa1dwMDdSneEDzefxohzj1J7gfCKIRRCKMQRiGM0hFGIYzqC4wadr5N1YRg1Dvc7491uyYVd/poxvCXmtekijdssrpauyfV+zF3v9xdvj5Ovo+/f/7buQ///AlelYdAqlmBEUdw/TtviCAtFeejGN2tbaHYihdYV+8wSWa7K/erXJ59iHvQqOTqKLmtS+5IuCuo9mRqdud3y5IrdpiMD0iu2K+xAsmFb93jFw6Xd2qWU/pwdRQVyKV0LwFIS/AA9LuJSMpSgm6MOqekuF24e0rKIdUz7Yij2tL9cZohMQCKaYvnOjRGfzk8eXbUJy4Me0Z/eTvLUe8oEi1l5/QHbgGscqMiLT6Meh+pT6MmRxGqf4kqoHEe+3XDo7KcEUbEqZ15kSvzweJZkx3xrGK6EM+K7HdHAa3C+WuJkPU1viVCDJWLBGBF9IabnA+usJZ3qRqjI7YqVAx1qLx/FOYmXhvQQYSrbpxAqqi9FdewtMheYBVs3gdEKWLz7hVp2qFG73WOSH1y9zqLehVagqVWr2bfAfWqOr1a+15nsaIWF1/B0qLX0qsI7hDcIbj7wOBOdEqUrl2ANVGFfbP10R3RJ0fshRiT3nkhuKtW/Sxw7VXgUkWqvRB45nX/bCFJr7Hv0WxhD9RoxZ00b8J2EalPDttJWhU4W0KxVsXtNeq1am1sJ1XU3tUDsLSAG2sOaFVEdojsENl9YGQnuiSZ1unOJcEDSRVEJGmZ7fXXMovrr8Qpo6bWMlO4nVZXeKZtoii26UCNBVFsbNJRxLdNLU4WO6xisTHdc9h2hryulsx68J7+EdgzZxpyQ65d+68ce13JxSuuyxIEPt3mM926DjNVgX7YTj3ERu33hyzBmj7NI1PHgCWrxk6hX+S+3U4EgW7AzhEhgmlA06ojQMrFmevG7BzdA73fw9EKkzFHwVFx/1YPOLpnxdF7OApBsPI5B0fH3JHWIXMvp5v/XWr+im60T19mtz//8/fzAtxh9+6L+WKDvr4YGNdpkEy/NgbGbXJVX/zngRv5O8td20ccPhOPp5movLsT5LiOzl/rzp8IP8260TOpovaiZ6CwgLf2VTmVhrE1Hv750O95EO1Iwmf8Fx5QgwfU7JcfPKBGxwNq4gw8oKaPMwMf64CawsRAX2cExDCaqXJGAPStoMVBAvT6045x7klfj2CqvOwc5BQUpETI1DBkMswS14lW+65zqab27joHxQU89aEiaNoVAiunprAKMdUBg4uYCjEVYirEVId4jpgKMRUkeIip3oKpck+rM1AFrbJ693xWcotCMnt11LNVPUNewLE7zfHzQk/W858OR4mm8mZ2mKXylHMRHmni5QYd6zizgoozldJPfwP99IseUhCYAldLQWiTJcZzmo3nEFO6U4XUXY0L1JUJkaKYDqkQrUXHorIZIkD0XbEZ2h+l67sZIoBrplSJgsfnoBJtXIkKbK+7ilyuqb2zq2B5wRN5OpCX2pMock2KJ1FS8UB5aXPSTdjzQWpvmJJqyix8w9tUpIaELVPSC2PjvS8Quv8F+buXX3j31hl4gOxfm1l2YOzOHZgRFYnU4ikRgySeWiB4PhFJP/0f \ No newline at end of file diff --git a/data/YOLOv3.png b/data/YOLOv3.png new file mode 100644 index 0000000..58a230d Binary files /dev/null and b/data/YOLOv3.png differ diff --git a/data/YOLOv3.svg b/data/YOLOv3.svg new file mode 100644 index 0000000..f0e25ae --- /dev/null +++ b/data/YOLOv3.svg @@ -0,0 +1 @@ +
YOLOv3
(width=416, height=416, depth=3)
YOLOv3...
Darknet-53
Darknet-53
w × h × d
w × h × d
w/32 × h/32 × 1024
w/32 × h/32 × 1024
YoloConv
(512)
YoloConv...
w/32 × h/32 × 512
w/32 × h/32 × 512
YoloOutput
(512, 3, classes=1)
YoloOutput...
YoloConv
(filters)
YoloConv...
DarknetConv
(filters, 1)
DarknetConv...
w × h × d
w × h × d
DarknetConv
(filters*2, 3)
DarknetConv...
w × h × f
w × h × f
w × h × 2*f
w × h × 2*f
YoloOutput
(filters, anchors, classes)
YoloOutput...
DarknetConv
(filters*2, 3)
DarknetConv...
w × h × d
w × h × d
w × h × 2*%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphMof
w × h × 2*%3CmxGra...
YoloConvConcatenate
(filters)
YoloConvConcatenate...
DarknetConv
(filters, 1)
DarknetConv...
w/2 × h/2 × d
w/2 × h/2 × d
w/2 × h/2 × f332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w/2 × h/2 × f332Cm...
Upsampling2D
(2)
Upsampling2D...
w × h × f332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w × h × f332CmxGra...
Concatenate
Concatenate
w × h × (e+f)332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w × h × (e+f)332Cm...
YoloConv
(filters)
YoloConv...
w × h × f332CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%332
w × h × f332CmxGra...
w/16 × h/16 × 512
w/16 × h/16 × 512
w/8 × h/8 × 256
w/8 × h/8 × 256
(0)
(0)
(1)
(1)
(2)
(2)
w/32 × h/32 × 3 × 6%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22w%2F32%20%C3%97%20h%2F32%20%C3%97%20512%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2280%22%20y%3D%22280%22%20width%3D%22110%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphModel%3
w/32 × h/32 × 3 ×...
YoloConvConcatenate
(256)
YoloConvConcatenate...
w/16 × h/16 × 256
w/16 × h/16 × 256
YoloOutput
(256, 3, classes=1)
YoloOutput...
w/16 × h/16 × 3 × 6%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22w%2F32%20%C3%97%20h%2F32%20%C3%97%20512%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2280%22%20y%3D%22280%22%20width%3D%22110%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphModel%3
w/16 × h/16 × 3 ×...
YoloConvConcatenate
(128)
YoloConvConcatenate...
w/8 × h/8 × 128
w/8 × h/8 × 128
YoloOutput
(128, 3, classes=1)
YoloOutput...
w/8 × h/8 × 3 × 6%3CmxGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22w%2F32%20%C3%97%20h%2F32%20%C3%97%20512%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2280%22%20y%3D%22280%22%20width%3D%22110%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphModel%3
w/8 × h/8 × 3 × 6%...

Amount Predicted Boxes:
w/32 × h/32 × 3
Amount Predicted Boxes:...

Amount Predicted Boxes:
w/16 × h/16 × 3
Amount Predicted Boxes:...

Amount Predicted Boxes:
w/8 × h/8 × 3
Amount Predicted Boxes:...
DarknetConv
(anchors*[classes+5], 1, bn=False)
DarknetConv...
w × h × anc*[cls+5]xGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphMo
w × h × anc*[cls+5...
Reshape
Reshape
w × h × anc × cls+5xGraphModel%3E%3Croot%3E%3CmxCell%20id%3D%220%22%2F%3E%3CmxCell%20id%3D%221%22%20parent%3D%220%22%2F%3E%3CmxCell%20id%3D%222%22%20value%3D%22%26lt%3Bb%26gt%3BBatchNorm%26lt%3Bbr%26gt%3B%26lt%3B%2Fb%26gt%3B%22%20style%3D%22text%3Bhtml%3D1%3BstrokeColor%3Dnone%3BfillColor%3Dnone%3Balign%3Dcenter%3BverticalAlign%3Dmiddle%3BwhiteSpace%3Dwrap%3Brounded%3D0%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2240%22%20y%3D%22200%22%20width%3D%22320%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%223%22%20value%3D%22%22%20style%3D%22html%3D1%3BendArrow%3Dnone%3BendFill%3D0%3BstrokeColor%3D%23000000%3BstartArrow%3Dblock%3BstrokeWidth%3D1%3BanchorPointDirection%3D1%3Bcomic%3D0%3Bjiggle%3D10%3B%22%20edge%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20width%3D%2250%22%20height%3D%2250%22%20relative%3D%221%22%20as%3D%22geometry%22%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22270%22%20as%3D%22sourcePoint%22%2F%3E%3CmxPoint%20x%3D%22200%22%20y%3D%22240%22%20as%3D%22targetPoint%22%2F%3E%3C%2FmxGeometry%3E%3C%2FmxCell%3E%3CmxCell%20id%3D%224%22%20value%3D%22%22%20style%3D%22rounded%3D1%3BwhiteSpace%3Dwrap%3Bhtml%3D1%3B%22%20vertex%3D%221%22%20parent%3D%221%22%3E%3CmxGeometry%20x%3D%2290%22%20y%3D%22240%22%20width%3D%2290%22%20height%3D%2230%22%20as%3D%22geometry%22%2F%3E%3C%2FmxCell%3E%3C%2Froot%3E%3C%2FmxGraphMo
w × h × anc × cls+...
DarknetConv
(filters, 1)
DarknetConv...
DarknetConv
(filters*2, 3)
DarknetConv...
w × h × f
w × h × f
w × h × 2*f
w × h × 2*f
DarknetConv
(filters, 1)
DarknetConv...
w × h × f
w × h × f
w × h × e
w × h × e
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/data/bounding-box.png b/data/bounding-box.png new file mode 100644 index 0000000..eb11c66 Binary files /dev/null and b/data/bounding-box.png differ diff --git a/data/docker-architecture.png b/data/docker-architecture.png new file mode 100644 index 0000000..f4af56a Binary files /dev/null and b/data/docker-architecture.png differ diff --git a/data/fu_logo.png b/data/fu_logo.png new file mode 100644 index 0000000..992821f Binary files /dev/null and b/data/fu_logo.png differ diff --git a/data/safari.png b/data/safari.png new file mode 100644 index 0000000..289e2d0 Binary files /dev/null and b/data/safari.png differ diff --git a/data/sedric.jpeg b/data/sedric.jpeg new file mode 100644 index 0000000..830b3a9 Binary files /dev/null and b/data/sedric.jpeg differ diff --git a/data/traffic_light_on_plane.png b/data/traffic_light_on_plane.png new file mode 100644 index 0000000..2d9594f Binary files /dev/null and b/data/traffic_light_on_plane.png differ diff --git a/data/yolov1-bounding.png b/data/yolov1-bounding.png new file mode 100644 index 0000000..a57cdcc Binary files /dev/null and b/data/yolov1-bounding.png differ diff --git a/data/yolov1.png b/data/yolov1.png new file mode 100644 index 0000000..fbb7ae9 Binary files /dev/null and b/data/yolov1.png differ diff --git a/out/.gitkeep b/out/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/01-title.tex b/src/01-title.tex new file mode 100644 index 0000000..78295db --- /dev/null +++ b/src/01-title.tex @@ -0,0 +1,20 @@ +\title{\topic} +\author{ +\studentName \\ +\texttt{Student ID: \matrikel} +} +\subject{\type} +\publishers{ +\begin{tabular}{l l} + \textbf{\normalsize{Year of Study:}} & \normalsize{\jahrgang} \tabularnewline + \textbf{\normalsize{Faculty:}} & \normalsize{} \tabularnewline + \textbf{\normalsize{Study Course:}} & \normalsize{Computer Science} \tabularnewline + \textbf{\normalsize{First Examiner:}} & \normalsize{\erstBetreuer} \tabularnewline + \textbf{\normalsize{Second Examiner:}} & \normalsize{\zweitBetreuer} \tabularnewline +\end{tabular} +} +\titlehead{ +\hspace{15mm} +\includegraphics[scale=0.1]{../data/fu_logo.png} +} +\maketitle diff --git a/src/02-abstract.bib b/src/02-abstract.bib new file mode 100644 index 0000000..af5f9a7 --- /dev/null +++ b/src/02-abstract.bib @@ -0,0 +1,54 @@ +% Abstract +@book{maurer2015autonomes, + title={Autonomes Fahren}, + author={Maurer, Markus and Gerdes, J Christian and Lenz, Barbara and Winner, Hermann}, + year={2015}, + publisher={Springer Berlin Heidelberg}, + pages={176\pagehyphen177} +} + +@misc{berlkoenig, + title = {Berlkoenig {\textbar} {Die} {Idee}}, + url = {https://www.berlkoenig.de/die-idee}, + abstract = {Der BerlKönig chauffiert dich sicher und zuverlässig durch Berlin. Einfach per App bestellen, bargeldlos zahlen \& los fahren.}, + urldate = {2020-06-03} +} + +@article{10.1257/aer.p20161002, + Author = {Cramer, Judd and Krueger, Alan B.}, + Title = {Disruptive Change in the Taxi Business: The Case of Uber}, + Journal = {American Economic Review}, + Volume = {106}, + Number = {5}, + Year = {2016}, + Month = {May}, + Pages = {177-82}, + DOI = {10.1257/aer.p20161002}, + URL = {https://www.aeaweb.org/articles?id=10.1257/aer.p20161002} +} + +@misc{Herger_2018, + title={Waymo: 14,4 Millionen Kilometer und ein Technologievorsprung von Jahren}, + url={https://derletztefuehrerscheinneuling.com/2018/09/02/waymo-144-millionen-kilometer-und-technologievorsprung-von-jahren/}, + abstractNote={Ein neuer Monat, ein neuer Meilenstein für Waymo, Googles Selbstfahrtechnologie-Gruppe. Neun Millionen Meilen (14,4 Millionen Kilometer) in autonomen Modus haben die Autos nun erreicht, bei einer a…}, + journal={Der letzte Führerscheinneuling...}, + author={Herger, Mario}, + year={2018}, + month={Sep} +} + +@article{glancy2015autonomous, + title={Autonomous and automated and connected cars-oh my: first generation autonomous cars in the legal ecosystem}, + author={Glancy, Dorothy J}, + journal={Minn. JL Sci. \& Tech.}, + volume={16}, + pages={684}, + year={2015}, + publisher={HeinOnline} +} + +@misc{safari, + title = {SAFARI – Verkehrspolitik: Forschungs- und Entwicklungsprojekte / Land Berlin}, + url={https://www.berlin.de/senuvk/verkehr/politik_planung/projekte/safari/index.shtml}, + abstractNote={Senatsverwaltung für Umwelt, Verkehr und Klimaschutz in Berlin – Verkehrspolitik / Forschungs- und Entwicklungsprojekte: SAFARI – Sicheres automatisiertes und vernetztes Fahren auf dem Digitalen Testfeld Stadtverkehr in Berlin Reinickendorf} +} diff --git a/src/02-abstract.tex b/src/02-abstract.tex new file mode 100644 index 0000000..9d9c212 --- /dev/null +++ b/src/02-abstract.tex @@ -0,0 +1,49 @@ +\section*{Abstract} +\addcontentsline{toc}{chapter}{Abstract} + +Autonomous driving is becoming increasingly important in the 21st century. +It offers the possibility to complement existing mobility concepts and to enable new groups of people to move independently, such as people with limited mobility.\cite{maurer2015autonomes} +In recent years, many new and innovative ideas have been established, bringing the boundaries between local public transport and individual transport ever closer. + +The BVG (Berliner Verkehrsgesellschaft) in Berlin, for example, offers the BerlKönig service, which functions as a mix of on-call bus and cab. +When ordering a ride, specify the start and destination points and the number of people to get transported. +The system then bundles similar trips together so that the called BerlKönig bus can run several tours simultaneously. +That reduces both the fare compared to a cab ride and is better for the environment.\cite{berlkoenig} +Uber modernizes the cab industry and manages to use a higher capacity utilization of existing resources. +Thus, Uber's drivers have a significantly increased workload and fewer empty runs.\cite{10.1257/aer.p20161002} +The quoted article points out that these improvements to detect outdated and inefficient regulations for the cab industry that do not apply to Uber, but also to better driver coordination with \q{more efficient driver-passenger matching technology based on mobile Internet technology}. + +The route planning for the drivers is usually already predefined by the system and is controlled by the system by suggesting possible connecting tours. +In the case of BerlKönig, the driver no longer has any room for maneuver. +Both concepts presented can be further optimized by autonomous vehicles. + +Safety plays a unique role in the development of autonomous cars. +In theory, autonomous vehicles from Waymo, a subsidiary of the Alphabet Group, to which Google also belongs, have been driving since 2012, have covered more than 14 million kilometers in the period up to 2018, and in autonomous mode have caused only one accident themselves.\cite{Herger_2018} +In practice, many questions are still unanswered. +Sensors and vehicle communication can be disturbed, legal liability issues are still unresolved, and the technical aspects, including implementation, are still partially unresolved.\cite{glancy2015autonomous} +Because of these many problems, Waymo cars only drive on specific routes in simple neighborhoods with little traffic in California's sunny Mountain View. + +Concerning safety, the detection of traffic light signals in road traffic, in the following called traffic lights, is an essential part of autonomous driving. +Previous technology used at the Free University of Berlin to detect traffic lights is based on markings in maps in which the car searches for traffic lights using image processing methods. +Furthermore, as part of a project carried out in the past two years in a test area in Berlin in cooperation with the Senate Department for Environment, Traffic and Climate Protection, and the Fraunhofer Institute FOKUS, who have extended traffic light systems at intersections with transmitter masts. +These indicate not only the general condition of the crossings (including the layout of the turning lanes and road works) but also the current status of the traffic lights.\cite{safari} + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.85\textwidth} + \includegraphics[width=\linewidth]{../data/safari.png} + \caption{Funktionsweise von SAFARI} %Bildunterschrift + \label{fig:safari} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +This method of detecting traffic lights works very well on test routes or routes that have been upgraded for this purpose, but not in unknown terrain. +The systems fail at temporary traffic lights at road works and in other countries that are not equipped with such a system. +In many situations, such retrofitting of existing traffic lights is not economically viable, especially since all traffic lights would have to be retrofitted for the use of autonomous cars without a transitional period. + +In this thesis, a different approach to traffic light detection shall be applied. +Here, the hardware of the cameras and graphic units already installed in the autonomous car shall be used to detect traffic lights in real-time utilizing a neural network. diff --git a/src/03-acronym.tex b/src/03-acronym.tex new file mode 100644 index 0000000..9156246 --- /dev/null +++ b/src/03-acronym.tex @@ -0,0 +1,34 @@ +{\let\cleardoublepage\relax +\newpage +\chapter*{List of Abbreviations}} +\addcontentsline{toc}{chapter}{Abkürzungsverzeichnis} +\begin{acronym}[SEPSEP] + \acro{API}{Application Programming Interface} + \acro{CCR}{Center for Clinical Research} + \acro{CDISC}{Clinical Data Interchange Standards Consortium} + \acro{CRF}{Case Report Form} + \acro{CRUD}{Create-Read-Update-Delete} + \acro{CSV}{Comma-separated values} + \acro{DDS}{Data Definition Sheet} + \acro{FDA}{Food and Drug Administration} + \acro{GUI}{grafische Benutzeroberfläche} + \acro{HTML}{Hypertext Markup Language} + \acro{HTTPS}{Hypertext Transfer Protocol Secure} + \acro{HTTP}{Hypertext Transfer Protocol} + \acro{IT}{Informationstechnik} + \acro{JSON}{JavaScript Object Notation} + \acro{JS}{JavaScript} + \acro{LDAP}{Lightweight Directory Access Protocol} + \acro{MDD}{Medical Device Directive} + \acro{PDF}{Portable Document Format} + \acro{RDBMS}{Relationales Datenbankmanagementsystem} + \acro{REST}{Representational State Transfer} + \acro{SAS}{Statistical Analysis System} + \acro{SOAP}{Simple Object Access Protocol} + \acro{SQL}{Structured Query Language} + \acro{URI}{Uniform Resource Identifier} + \acro{URL}{Uniform Resource Locator} +\end{acronym} +% Referenz mit \acs{edc} - Kurzfassung erzwingen +% \acl{edc} - Langfassung erzwingen +% \ac{edc} - Automatisch diff --git a/src/04-a21-implementation-init.bib b/src/04-a21-implementation-init.bib new file mode 100644 index 0000000..ea68a99 --- /dev/null +++ b/src/04-a21-implementation-init.bib @@ -0,0 +1,70 @@ +@book{Redmon_2020, + title = {pjreddie/darknet}, + author = {Redmon, Joseph}, + year = 2020, + month = oct, + url = {https://github.com/pjreddie/darknet}, + abstractnote = {Convolutional Neural Networks. Contribute to pjreddie/darknet development by creating an account on GitHub.} +} +@book{Caesar2011_2020, + title = {Caesar2011/yolov3-tf2}, + author = {Caesar2011}, + year = 2020, + month = oct, + url = {https://github.com/Caesar2011/yolov3-tf2}, + abstractnote = {YoloV3 Implemented in Tensorflow 2.0. Contribute to Caesar2011/yolov3-tf2 development by creating an account on GitHub.} +} +@misc{why_docker_2020, + title = {Why Docker? 2020}, + year = 2020, + month = oct, + url = {https://www.docker.com/why-docker}, + abstractnote = {Learn why Docker is the leading container platform — Freedom of app choice, agile operations and integrated container security for legacy and cloud-native applications.} +} + +@article{7036275, + title = {Containers and Cloud: From LXC to Docker to Kubernetes}, + author = {D. {Bernstein}}, + year = 2014, + journal = {IEEE Cloud Computing}, + volume = 1, + number = 3, + pages = {81--84} +} + +@misc{yoloWebsite, + title = {YOLO: Real-Time Object Detection}, + url = {https://pjreddie.com/darknet/yolo/}, + abstractnote = {You only look once (YOLO) is a state-of-the-art, real-time object detection system.} +} + +@inproceedings{Mao_2019_ICCV, + title = {A Delay Metric for Video Object Detection: What Average Precision Fails to Tell}, + author = {Mao, Huizi and Yang, Xiaodong and Dally, William J.}, + year = 2019, + month = oct, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)} +} + +@article{doi:10.1021/ci0342472, + title = {The Problem of Overfitting}, + author = {Hawkins, Douglas M.}, + year = 2004, + journal = {Journal of Chemical Information and Computer Sciences}, + volume = 44, + number = 1, + pages = {1--12}, + doi = {10.1021/ci0342472}, + url = {https://doi.org/10.1021/ci0342472}, + note = {PMID: 14741005}, + eprint = {https://doi.org/10.1021/ci0342472} +} + +@misc{docker_replacing_vm, + title="Are Containers Replacing Virtual Machines?", + url={https://www.docker.com/blog/containers-replacing-virtual-machines/}, + abstractNote={Learn from Docker experts to simplify and advance your app development and management with Docker. Stay up to date on Docker events and new version announcements!}, + journal={Docker Blog}, + year={2018}, + month={Aug} +} diff --git a/src/04-a21-implementation-init.tex b/src/04-a21-implementation-init.tex new file mode 100644 index 0000000..d6d8031 --- /dev/null +++ b/src/04-a21-implementation-init.tex @@ -0,0 +1,120 @@ +Joseph Redmon, developer of YOLO and worker at the Massachusetts Institute of Technology (MIT), built on a self-engineered architecture named Darknet. +Darknet is comparable with Tensorflow with a reduced feature space but optimized on running YOLO. He trained the reference implementation of YOLOv3 programmed based on Darknet on 80 different classes.\cite{Redmon_2020} +The class list already contains many classes related to autonomous driving and traffic, for instance, car, person, or traffic sign. +It also included the class traffic light. + +On running the network with the Cityscape dataset with a first visual approach, a low level of precision becomes observed. +That is due to the recognition of several traffic signs as traffic lights. +Both classes are very similar to each other. +Traffic lights and traffic signs are mounted on a mast and are tiny objects challenging to spot with YOLO. The developers of the reference implementation fed images of different input size to the network during training. +As previously mentioned, the network is translation invariant due to the lack of fully connected layers. +Training on different input sizes makes the network also robust against scaling. + +\section{Used Software and Hardware}\label{sec:used-software-and-hardware} + +The foundation with this approach provides reasonably good results, but further improvements remain required. +A version of YOLOv3 transposed to Tensorflow is used to develop the network further. +Tensorflow offers a more straightforward interface for quick modifications. +The fork with all described changes is available on Github.\cite{Caesar2011_2020} + +Docker is an open-source software to containerize software. +An isolated user space, filesystem, and network allow the software to run independently without interference. +The installation process is fast and portable to share it with others due to a pre-determined build script.\cite{why_docker_2020} +Systems based on virtual machines utilize a hypervisor to abstract a so-called guest operation system on which each application runs on. +In contrast to a virtual machine, Docker containers make use of the operating system of the host machine. +That increases performance and reduces the overhead size used by each application.\cite{7036275} + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{1\textwidth} + \includegraphics[width=\linewidth]{../data/docker-architecture.png} + \caption{Docker Containers Versus Multiple Virtual Machines\cite{docker_replacing_vm}} %Bildunterschrift + \label{fig:docker-architecture} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +A Docker container, based on the official Tensorflow image, bundles Tensorflow, OpenCV, and the forked YOLOv3 repository together to enable a fast workflow. + +The test results refer to the use of a GeForce RTX 2080 Ti with 11GB of GDDR6 memory. +The graphic card achieves roughly 13.4 TFLOPs. + +\subsection{YOLO in Tensorflow}\label{subsec:yolo-in-tensorflow} + +The network transposed to Tensorflow includes an implementation of YOLOv3 and YOLOv3-Tiny. +The command-line interface (CLI) can also convert weightings of networks, which are available in the file format of MIT's Darknet, into a Tensorflow checkpoint. +For this purpose, weight specification and the number of classes on which those weights are trained is necessary. + +For testing, only the Cityscapes dataset is used for the time being. +As expected, the mAP is identical to the Darknet implementation. +However, it can be noted that Tensorflow is slower than the Darknet implementation. +Darknet is optimized to run YOLO. For better comparability of the results, the speed of the Tensorflow implementation is taken as reference. + +When training with the Tensorflow implementation, it is not possible to vary the image size during training. +Contrarily the network offers the same range of functions. + +One can expect that optimizing the network for the particular needs of traffic light detection can further increase the median average precision. +The already trained network is trained on 80 classes and, therefore, not specialized. +Although this provides a broad data basis, because the reference weightings are trained on the COCO dataset\cite{yoloWebsite}, and reduce susceptibility to failure, there is a risk of confusion with other classes. +As already mentioned, YOLOv3 is context-sensitive due to the convolutional layer. +Both traffic lights and traffic signs appear on traffic masts at about three meters height. +At this spot, the activation for both classes is relatively high. +The result can be a traffic sign incorrectly recognized as a traffic light (leads to less precision) or a traffic sign incorrectly recognized as a traffic light (leads to less recall). +Confusion with other classes is also possible, but the probability decreases with increasing differences between the classes. + +If the training process only includes one class, the anchor boxes may also specialize. +Traffic lights are not horizontal in Germany. +If the network is trained for only one class, the anchor box for horizontal objects is less active or not active altogether. +The expectation is that recognizing a traffic light as a square decreases, and the location error, which is comparatively high in YOLOv3\cite{DBLP:conf/cvpr/RedmonF17}, decreases. + +\subsubsection{Evaluation of the Reference Weights}\label{subsubsec:evaluation-of-the-reference-weights} + +The Mean average precision (mAP) is a performance indicator for determining the quality of a classification network. +The calculation is relatively complex. +The metric AP indicates how high the precision of a class on an average across all images is. +The mAP is the average precision covering all trained or relevant classes. +In the application case of traffic light recognition, only the detection of the class traffic light is relevant. +The mAP is identical to the AP for a single class. +The mAP has, over time, also established itself as an indicator for object recognition networks such as YOLO. There is no generally valid definition for these networks, and different datasets like VOC or COCO implement mAP differently. +In principle, a bounding box is considered correct if it has an IoU with a target box of more than 0.5. Differently used IoUs are also possible. +This thesis uses the definition of the mAP of COCO\@. + +However, the metric mAP has some weak points that the indicator does not express. +Videos have an additional time component. +The metric does not reflect the detection robustness of a bounding box. +The requirement does not yet include tracking and matching a bounding box over several frames, but whether, for instance, it is only detected in every second frame and "flickers".\cite{Mao_2019_ICCV} +Furthermore, the mAP can only provide accuracy compared to the annotated training data. +If these are inaccurate or have a bias, the metric will not reflect it. +The metric is suitable as a comparison by using the same dataset and visualization. + +TABLE + +The reference weightings achieve an AP\textsuperscript{IoU=0.5} of 0.35 in the class traffic lights, significantly below the average across all classes of 0.70. +A possible reason for this can again be the size. +An absolute translation of a few pixels in a small bounding box results in a much faster declining IoU than in large objects. + +The tiny variant is about 3.5 times faster than the large network. + However, the average precision is only approximately two thirds as high. + The resulting annotations also clearly show the difference: Many traffic lights are not recognized or only inaccurately. + +When viewing the annotated images, an additional problem becomes apparent. +In the COCO triangulation data set, the traffic light class includes all traffic lights, including those that are not directed at the vehicle from the front or are only indirectly relevant, such as pedestrian lights. +In addition, small yellow lights, such as those mounted on construction site pillars, also seem to belong to the class. +That is technically correct but not desirable. +This form is an implication of human bias. +The Cityscapes dataset mainly contains facing traffic lights. +DriveU distinguishes between the poses of traffic lights, including "frontal". + +\subsubsection{Investigation of Different Partial Aspects}\label{subsubsec:investigation-of-different-partial-aspects} + +With this background, the following tests base on a completely re-trained network with the Cityscapes dataset. +The default configuration includes early stopping. +If the accuracy of the validation dataset does not improve during the last three epochs, the training has reached a plateau and stops. +Continuing the training would promptly increase the accuracy of the training dataset to improve accuracy, but the accuracy of the test dataset would decrease. +Overfitting becomes effective.\cite{doi:10.1021/ci0342472} + +The size is set to 416x416 pixels to allow comparison with the imported reference weights. +Optimization of the size parameter also takes place in the course of development. diff --git a/src/04-content.tex b/src/04-content.tex new file mode 100644 index 0000000..32fa737 --- /dev/null +++ b/src/04-content.tex @@ -0,0 +1,13 @@ +\chapter{Fundamentals}\label{ch:fundamentals} + +\input{04a01-requirement-analysis} + +\newpage +\input{04a02-yolo} + +\newpage +\input{04a03-datasets} + +\chapter{Implementation}\label{ch:implementation} + +\input{04-a21-implementation-init} diff --git a/src/04a01-requirement-analysis.bib b/src/04a01-requirement-analysis.bib new file mode 100644 index 0000000..7cd5888 --- /dev/null +++ b/src/04a01-requirement-analysis.bib @@ -0,0 +1,89 @@ +@inproceedings{DBLP:conf/seuh/Weinmann03, + author = {Ulrich Weinmann}, + editor = {Johannes Siedersleben and + Debora Weber{-}Wulff}, + title = {Software im Automobil - Anforderungen und Chancen}, + booktitle = {Software Engineering im Unterricht der Hochschulen, {SEUH} 8, Berlin + 2003}, + pages = {1--7}, + publisher = {dpunkt}, + year = {2003}, + timestamp = {Mon, 10 May 2004 13:39:09 +0200}, + biburl = {https://dblp.org/rec/conf/seuh/Weinmann03.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@misc{kraftfahrtBundesamt, + title={Kraftfahrt-Bundesamt – 1. Januar 2019 – Bestand in den Jahren 2010 bis 2019 nach ausgewählten Fahrzeugklassen mit dem Durchschnittsalter der Fahrzeuge}, + url={https://www.kba.de/DE/Statistik/Fahrzeuge/Bestand/Fahrzeugalter/2019/2019_b_alter_kfz_z.html?nn=2731444}, + publisher={Kraftfahrt-Bundesamt} +} + +@article{Dierig_2020, + title={Landwirte: In Deutschland bleibt der autonome Traktor ein Traum}, + url={https://www.welt.de/wirtschaft/article205039236/Landwirte-In-Deutschland-bleibt-der-autonome-Traktor-ein-Traum.html}, + abstractNote={Längst müssen Bauern in vielen Ländern beim Ernten nicht mehr hinterm Steuer sitzen. Die Arbeit erledigen autonome Maschinen. Doch in Deutschland befürchtet der Staat Todesopfer auf dem Acker. Die Bundesrepublik ist dabei, den Anschluss zu verlieren.}, + journal={DIE WELT}, + author={Dierig, Carsten}, + year={2020}, + month={Jan} +} + +@misc{smartFarming, + title={Was ist Smart Farming?}, + url={https://www.smart-akis.com/index.php/de/netzwerk/was-ist-smart-farming/}, + abstractNote={Smart Farming repräsentiert die Anwendung von modernen Informations- und Kommunikationstechnologien (ICT) in der Landwirtschaft. Einige Quellen nennen dies die „Dritte Grüne Revolution“. Nach der Pflanzenzüchtung und der Gentechnik beeinflusst diese “Dritte Grüne Revolution” die landwirtschaftliche Welt über die Kombination von ICT Lösungen, das „Internet der Dinge“ (IoT), Sensoren und Aktoren, Geo-Positionierungssysteme, Big Data, unbemannte...}, + journal={Smart-AKIS} +} + +@misc{Sedric, + title={Sedric}, + url={https://www.horizont.net/tech/nachrichten/Sedric-Volkswagen-zeigt-erstes-Konzep-Auto-fuer-autonomes-Fahren-156373}, + abstractNote={Der VW-Konzern leitet nach dem Abgas-Skandal die Wende mit einem Zukunftsprogramm ein. Am Vorabend der Automobilmesse in Genf präsentierte der Konzern sein erstes Konzept eines selbstfahrenden Autos, das keinen menschlichen Fahrer mehr braucht.}, journal={https://www.horizont.net} } + +@misc{lidarCarResults, + title= {lidar car – Google Scholar}, + url={https://scholar.google.de/scholar?as_ylo=2019&q=lidar+car}, + journal={libar car – Google Scholar} +} + +@misc{sicherheitsbedenken, + title= {Erneut Tesla ausgetrickst | Autonomes Fahren \& Co}, + url={https://www.autonomes-fahren.de/sicherheitsbedenken-gegen-tesla-von-mcafee/}, + abstractNote={McAfee hat einen Tesla mit einem Stück Klebeband manipuliert.} +} + +@book{teslaSecurity, + title={Experimental Security Research of Tesla Autopilot\_2019}, + url={https://keenlab.tencent.com/en/whitepapers/Experimental_Security_Research_of_Tesla_Autopilot.pdf}, + publisher={Tencent Keen Security Lab}, + year={2019}, + month={Mar} +} + +@misc{elonMusk, + title = {YouTube - Elon Musk on Cameras vs LiDAR for Self Driving and Autonomous Cars}, + url={https://www.youtube.com/watch?v=HM23sjhtk4Q}, + abstractNote={Elon Musk says lidar “is a fool’s errand” and that anyone using lidar is “doomed.” He goes on to say that they are “expensive sensors that are unnecessary. It’s like having a whole bunch of expensive appendices. Like, one appendix is bad, well now you have a whole bunch of them, it’s ridiculous, you’ll see.”}, + journal={Elon Musk on Cameras vs LiDAR for Self Driving and Autonomous Cars} +} + +@article{hecht2018lidar, + title={Lidar for self-driving cars}, + author={Hecht, Jeff}, + journal={Optics and Photonics News}, + volume={29}, + number={1}, + pages={26--33}, + year={2018}, + publisher={Optical Society of America} +} + +@inproceedings{rohling2001waveform, + title={Waveform design principles for automotive radar systems}, + author={Rohling, Hermann and Meinecke, M-M}, + booktitle={2001 CIE International Conference on Radar Proceedings (Cat No. 01TH8559)}, + pages={1--4}, + year={2001}, + organization={IEEE} +} diff --git a/src/04a01-requirement-analysis.tex b/src/04a01-requirement-analysis.tex new file mode 100644 index 0000000..33af1e7 --- /dev/null +++ b/src/04a01-requirement-analysis.tex @@ -0,0 +1,88 @@ +\section{Requirement Analysis}\label{sec:requirement-analysis} +Components in autonomous vehicles must meet the unique requirements of the car. +Such applies to the mechanical, electrical, and software components.\cite{DBLP:conf/seuh/Weinmann03} + +\subsection{General Requirements for Software and Hardware in Cars}\label{subsec:general-requirements-for-software-and-hardware-in-cars} + +\subsubsection{Durability}\label{subsubsec:durability} +The average age of passenger cars in Germany in 2010 was 8.1 years. +In 2019 it was already 9.5 years, and the trend is still rising. +For tractors, the average age in 2019 will even be almost 30 years on average.\cite{kraftfahrtBundesamt} +Accordingly, the additional hardware used for artificial intelligence should also be durable and long-lasting. +Furthermore, tractors, in particular, are already operating autonomously in many countries and are an \q{elementary component of smart farming}. +Smart Farming describes the use of modern communication and information technology in agriculture.\cite{smartFarming} +Cameras have already proven themselves in several industrial sectors and are also used in the automotive industry as rearview cameras and blind-spot assistants for trucks. + +\subsubsection{Portability}\label{subsubsec:portability} +Furthermore, the technology to be carried must have a reasonable degree of portability because laws and practical handling limit the space that vehicles may occupy. +However, autonomous driving gives rise to new possibilities for shaping the shape of the cars. +In the fourth and fifth steps of automation, the steering wheel and pedals can be omitted for the driver or concealed while driving in autonomous mode. +One concept for a new form factor is \q{Sedric} from Volkswagen.\cite{Sedric} +The LIDAR sensors (Light Detection and Ranging) on the roof, which seem to be indispensable for approaches of the various developers, should be emphasized. +Since 2019 alone, Google Scholar shows about 16,000 hits for the keyword \q{lidar car} (as of June 2020).\cite{lidarCarResults} + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.85\textwidth} + \includegraphics[width=\linewidth]{../data/sedric.jpeg} + \caption{\q{Sedric} \textemdash a concept car from Volkswagen for autonomous driving\cite{Sedric}} %Bildunterschrift + \label{fig:sedric} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +\subsubsection{Safety}\label{subsubsec:safety} +The safety of vehicles has a high priority. +A safe vehicle prevents expensive repairs and saves the lives of outsiders and passengers. +Ultimately, this will also become a selling point, especially for autonomous cars. +After all, people entrust their lives to this machine. +Software built into it must be error-free, or at least exclude \q{false positives}, so that the vehicle does not accelerate unintentionally through manipulated traffic signs\cite{sicherheitsbedenken} or steer into oncoming traffic due to misleading points on the road\cite{teslaSecurity}. + +The calculations of the software that help to control the vehicle must be performed in real-time to ensure safety. +The latencies and thus the reaction speeds remain at a low level. + +\subsubsection{Production Costs}\label{subsubsec:production-costs} +Despite high safety requirements, the price of autonomous cars must be affordable. +That is one of the reasons why Tesla, for example, does not use costly LIDAR sensors. +According to Tesla boss Elon Musk, LIDAR sensors are too expensive, and all vision tasks are also accomplishable with cameras and low-cost RADAR (Radio Detection and Ranging) sensors. +Cameras provide significantly more information after resolving the three-dimensional perception, and humans also only use visible light for navigation.\cite{elonMusk} + +\vspace{1cm} +\begin{chapquote}{Elon Musk, 2019\cite{elonMusk}} + \q{Lidar is a fool’s errand, and anyone relying on lidar is doomed - expensive sensors that are unnecessary. It is like having a whole bunch of expensive appendices. Like, one appendix is bad, well now you have a whole bunch of them, it is ridiculous.} +\end{chapquote} + +\subsection{Requirements for Traffic Light Sensor Detection}\label{subsec:requirements-for-traffic-light-sensor-detection} +To achieve an autonomous vehicle of level 5, or already of autonomy level 3 when used in city traffic, the car must recognize traffic light heads correctly. +The current approach of traffic light detection with radio-controlled boxes at the roadside, as in the SAFARI project, works. +However, it has the decisive disadvantage that all traffic lights need a retrofit. +Temporary intersections, such as road works, might not be detected or deviate from the given plan due to flexible modifications. +Road traffic, as it exists today, is designed for visual contact. +Humans navigate by sight alone, except for acoustic support. + +Besides, this approach should not use LIDAR. These are, as already mentioned (see Production Costs), expensive and do not offer any particular added value since they work like cameras in the visible light spectrum or at the edge of it.\cite{hecht2018lidar} +Thus both have the same problems of particle penetration in snow, rain, dust, or hail. +RADAR does not have these problems, but due to its long wavelength of at least 4 mm, it can have a high inaccuracy - but a more extended range.\cite{rohling2001waveform} + +Another advantage of using cameras is the recognition of the color of traffic lights. +Visual recognition alone can already detect the state. +For these reasons, this approach is based entirely on cameras as sensors. + +The images received by the camera can be processed either by conventional image processing methods or by neural networks. +Traditional methods require a given algorithm to extract the required information. +A detailed description by algorithms is difficult due to the many different shapes and positions that light signal heads can take on and the danger of confusion with other objects such as the upper brake lights of trucks. +With neural networks, a determination of an algorithm is not necessary. +Adding a neural network can extract this information from the given annotated example data. +There are already many large data sets available for the detection of objects in road traffic.\cite{DBLP:journals/corr/CordtsORREBFRS16,DBLP:conf/icra/FreginMKD18} +Therefore, the use of such a network at this point makes sense. + +The YOLO neural network allows the segmentation of images into bounding boxes and classification of those with a precision of 57.9mAP in 51 milliseconds (approximately 20 frames per second (FPS)) on a Titan X\@.\cite{DBLP:journals/corr/abs-1804-02767} +In comparison, RetinaNet, a neural network with comparable functionalities, achieves the same precision in about four times the time. +That makes YOLO more suitable for use in real-time environments, such as in autonomous vehicles. + +To ensure safety and correctly detect all traffic lights, the network is optimized to precision. +For this purpose, the neural network is newly trained in custom classes. +The implementation will be explained in the following chapters. diff --git a/src/04a02-yolo.bib b/src/04a02-yolo.bib new file mode 100644 index 0000000..16ffd45 --- /dev/null +++ b/src/04a02-yolo.bib @@ -0,0 +1,79 @@ +@article{DBLP:journals/corr/abs-1804-02767, + author = {Joseph Redmon and Ali Farhadi}, + title = {YOLOv3: An Incremental Improvement}, + journal = {CoRR}, + volume = {abs/1804.02767}, + year = {2018}, + url = {http://arxiv.org/abs/1804.02767}, + archivePrefix = {arXiv}, + eprint = {1804.02767} +} + +@article{DBLP:journals/corr/abs-2004-10934, + author = {Alexey Bochkovskiy and + Chien{-}Yao Wang and + Hong{-}Yuan Mark Liao}, + title = {YOLOv4: Optimal Speed and Accuracy of Object Detection}, + journal = {CoRR}, + volume = {abs/2004.10934}, + year = {2020} +} + +@phdthesis{DBLP:phd/hal/Cherti18, + author = {Mehdi Cherti}, + title = {Deep generative neural networks for novelty generation : a foundational + framework, metrics and experiments. (R{\'{e}}seaux profonds g{\'{e}}n{\'{e}}ratifs + pour la g{\'{e}}n{\'{e}}ration de nouveaut{\'{e}} : + fondations, m{\'{e}}triques et exp{\'{e}}riences)}, + school = {University of Paris-Saclay, France}, + year = {2018}, + pages = {117-120}, + url = {https://tel.archives-ouvertes.fr/tel-01838272}, + timestamp = {Tue, 31 Jul 2018 17:12:16 +0200}, + biburl = {https://dblp.org/rec/phd/hal/Cherti18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@misc{redmon2016look, + title={You Only Look Once: Unified, Real-Time Object Detection}, + author={Joseph Redmon and Santosh Divvala and Ross Girshick and Ali Farhadi}, + year={2016}, + eprint={1506.02640}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} + +@inproceedings{DBLP:conf/cvpr/RedmonF17, + author = {Joseph Redmon and + Ali Farhadi}, + title = {YOLO9000: Better, Faster, Stronger}, + booktitle = {{CVPR}}, + pages = {2}, + publisher = {{IEEE} Computer Society}, + year = {2017} +} + +@proceedings{DBLP:conf/eccv/2018-11, + editor = {Vittorio Ferrari and + Martial Hebert and + Cristian Sminchisescu and + Yair Weiss}, + title = {Computer Vision - {ECCV} 2018 - 15th European Conference, Munich, + Germany, September 8-14, 2018, Proceedings, Part {XI}}, + series = {Lecture Notes in Computer Science}, + volume = {11215}, + publisher = {Springer}, + year = {2018}, + pages = {416} +} + +@article{DBLP:journals/corr/HeZRS15, + author = {Kaiming He and + Xiangyu Zhang and + Shaoqing Ren and + Jian Sun}, + title = {Deep Residual Learning for Image Recognition}, + journal = {CoRR}, + volume = {abs/1512.03385}, + year = {2015} +} diff --git a/src/04a02-yolo.tex b/src/04a02-yolo.tex new file mode 100644 index 0000000..7648488 --- /dev/null +++ b/src/04a02-yolo.tex @@ -0,0 +1,281 @@ +\section{The Application of YOLOv3}\label{sec:the-application-of-yolov3} +YOLO is a fast convolutional network to detect objects in images and classify them at the same time. +Joseph Redmon and Ali Farhadi developed the network, and their latest version, YOLOv3, was released in 2018.\cite{DBLP:journals/corr/abs-1804-02767} +In spring 2020, Alexey Bochkovskiy released the fourth version, YOLOv4, with further improvements.\cite{DBLP:journals/corr/abs-2004-10934} +This iteration increases the mAP (Mean Average Precision) while maintaining the speed of the mesh. +Since YOLOv4 does not bring any additional notable enhancements for detecting small objects, YOLOv3 takes application in this work instead. +Further optimizations will be made after a more detailed analysis to specialize YOLOv3 for small items. + +To better understand the architecture of YOLOv3, the structure of the previous versions will be explained first, whereby the features that are still used in YOLOv3 will be discussed in detail. + +In the following, constants are defined as unchangeable values. +Parameters are freely selectable, but afterward, they cannot be changed. +In contrast, variables can be changed at any time, even after the training of the network. + +\subsection{YOLOv1}\label{subsec:yolov1} +The basic functionality of YOLO does not change in the course of the versions. +The input for the network is an image with three dimensions. +In YOLOv1, the size of the image is predetermined due to the \q{Fully Connected Layer} at the end and equals 448 x 448 x 3 pixels. +The output is also a three-dimensional vector that describes the so-called output boxes - meaning in which part of the image with which size a particular object is located. + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.85\textwidth} + \includegraphics[width=\linewidth]{../data/yolov1.png} + \caption{Architecture of YOLOv1 Taken from the Paper of YOLOv1\cite{redmon2016look}} %Bildunterschrift + \label{fig:arcchitecture_v1} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +With YOLOv1, the width and height of the image are constants with 448 x 448 pixels. +That is followed by several convolutional and maximum pooling layers, which double the depth of the image while halving the width and height. + +In a convolutional neuronal network (CNN), each layer increases the understanding complexity of the image. +The first layer detects edges and surfaces, the second one already abstract shapes and the following parts of objects and objects itself. +Several layers are needed to detect the whole complexity of an object.\cite{DBLP:phd/hal/Cherti18} +In YOLO, the width and height are halved at each step to detect larger, connected objects. +The image depth doubles to detect multiple objects. +Alternating between convolutional filters with a filter size of 1x1 and 3x3 with double depth reduces the \q{feature space} of the previous layers and thus capture their full complexity. +YOLOv3 still uses this technique. + +In YOLOv1, the 7x7x1024 tensor resulting from the reduction is converted into a tensor with the dimensions 7x7x30 via two fully connected layers. +These tensors are interpreted as output boxes and are used to locate and classify the bounding boxes of objects in the image. + +\subsubsection{Output Boxes in YOLOv1} +YOLOv1 already implements the output as bounding boxes in a tensor. +The concept is modified just a bit in the following versions of YOLO. + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.85\textwidth} + \includegraphics[width=\linewidth]{../data/yolov1-bounding.png} + \caption{Separating the Image into Multiple Boxes Taken from the Paper of YOLOv1\cite{redmon2016look}} %Bildunterschrift + \label{fig:boxes_v1} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +YOLOv1 has as output a tensor of the size 7x7x30. +The image is divided into 49 equally distributed cells. +Each cell has 30 attributes in the reference implementation. +The attributes in the cell indicate whether an object was found in this cell. +A cell is responsible for an object if the center of the object is located in the respective cell. +Thus in the illustration, cell (1, 4) is responsible for recognizing the dog, and cell (2, 3) to recognize the bicycle, whereas one is the first column from the left and four the fourth row from the top. + +In the 30 attributes of a cell, YOLOv1 encodes two bounding boxes (with five attributes each) and a one-hot encoding of 20 classes. +In the five attributes of the bounding box, the position of the center (x, y) is relative to the center of the respective cell, and the width and height (w, h) relative to the total size of the image. +The fifth value specifies the \q{confidence} of how secure the network is that this given Bounding Box is also an object. +The two middle graphs in the figure indicate which class has the highest activation in the respective cell (bottom) and which bounding boxes have been detected (top). +The more secure the network is for a bounding box, the thicker the surrounding frame in the figure. +It can be seen that the dog, bicycle, and car are framed more intensively, and the respective class in the cell of the centers is correctly recognized. + +If the confidence for one of the two bounding boxes and a class from the One-Hot-Coding is greater than the threshold value 0.5, then an object with the recognized class exists in the image with the dimensions of the bounding box. + +When detecting the traffic light class only, the tensor would have a size of 7x7x11 because just two bounding boxes and one class would be present. + +\subsubsection{Problems of YOLOv1} +YOLOv1 has a fast runtime but rather many problems with the exact localization of objects. +Besides, the recall (percentage of actual positive results from all as correctly identified items) is low compared to other neural networks for object detection.\cite{DBLP:conf/cvpr/RedmonF17} + +Especially the recall is essential for the recognition of traffic lights because otherwise, many traffic lights are not recognized. +At the expense of precision (proportion of the actual positive results from all identified objects), the recall can be increased. +However, it would lead to worse results overall because the standard threshold value of 0.5 offers the best cost-benefit ratio (see figure). + +The comparatively low localization accuracy results mainly from the inaccuracy of the anchor boxes at the end. + The network predicts several anchor boxes and one class for a single cell. + If there are centers of several objects (for instance, a human in front of a car) in a cell, the two different anchor boxes both predict both bounding boxes with high certainty, but due to the network design, only one class can be detected. + +\subsection{Improvements in YOLOv2 and YOLOv3}\label{subsec:improvements-in-yolov2-and-yolov3} +The transition from first-generation YOLOv1 to YOLOv2 has resulted in significant changes. +However, the basic concept has been maintained. +YOLOv2 and YOLOv3 differ only marginally. +The following concept summarizes both iterations and describes the current and used state of YOLOv3. + +\subsubsection{Batch Normalization} +Starting with YOLOv2, batch normalization is performed after each convolutional layer. +The use of Batch Normalization accelerates convergence while eliminating the need for other forms of regularization. +Thus, dropout could be avoided even during training without overfitting the network. +After adding Batch Normalization behind each Convolutional Layer in the network, followed by LeakyReLU activation, YOLO's mAP is increased by over two percent. + +The network reuses the combination of the convolutional layer and the batch normalization several times. +It occurs in the following text under the name \q{DarknetConv}\myref{par:darknet-convolutional-layer}. + +\subsubsection{Darknet} + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{1.0\textwidth} + \includegraphics[width=\linewidth]{../data/Darknet.png} + \caption{Network Architecture of Darknet-53} %Bildunterschrift + \label{fig:darknet} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +The probably most significant change is the introduction of a network for general object recognition (backbone). +Nowadays, it is general practice in image recognition with neural networks to base on an already existing, generally pre-trained backbone. +The most common ones are VGG or ResNet, each in different variants.\cite{DBLP:conf/eccv/2018-11}] +YOLOv2 uses an own backbone called \q{Darknet}. +That is better designed to handle the full utilization of the graphics card and can therefore perform more operations per second. + +While the Darknet-19 used in YOLOv2 has a lower accuracy (74.1\%), Darknet-53 from YOLOv3 has a similar accuracy (77.2\%) as ResNet-152 (77.6\%). +Note that ResNet-152 performs 157\% more FLOPs (Floating Point Operations) than Darknet-53. +Because ResNet has more and less performing layers, Darknet achieves more operations per second, and therefore more frames per second (FPS). + +\def\arraystretch{1.5} + \begin{tabularx}{\textwidth}{|l|r|r|r|r|R{1cm}|} \hline + \textbf{Backbone} & \textbf{Top-1 in \%} & \textbf{Top-5 in \%} & \textbf{FLOP in $10^9$} & \textbf{GFLOP/s} & \textbf{FPS} \\ \hline + \textbf{Darknet-19} & $74.1$ & $91.8$ & $7.29$ & $1246$ & $171$ \\ \hline + \textbf{ResNet-101} & $77.6$ & $93.7$ & $19.7$ & $1039$ & $53$ \\ \hline + \textbf{ResNet-152} & $77.6$ & $93.8$ & $29.7$ & $1090$ & $37$\\ \hline + \textbf{Darknet-53} & $77.2$ & $93.8$ & $18.7$ & $1457$ & $78$ \\ \hline + \caption{Performance Comparison of Darknet and ResNet\cite{DBLP:journals/corr/abs-1804-02767}} + \label{tab:comparison_resnet_darknet} + \end{tabularx} +\def\arraystretch{1} + +\paragraph{Darknet Convolutional Layer}\label{par:darknet-convolutional-layer} +The base of Darknet is the already described \q{DarknetConv}, a convolutional layer with subsequent batch normalization and Leaky ReLU as the activation function. + +\paragraph{Residual Block} +In general, a deep neural network can understand more complex structures and learn more information. +However, the derivation, the gradient, in the deeper layers becomes more and more marginal, and therefore the training of those deeper layers becomes more and more difficult. +The additional capacity decreases with increasing depth. +To counteract this problem, ResNet was created, which works with 152 layers and has achieved a complexity that was unmatched at the time of development. +ResNet introduces so-called \q{Residual Blocks}. +This block encloses two convolutional layers and adds the result with a shortcut from the source layer.\cite{DBLP:journals/corr/HeZRS15} + +By skipping some layers during backpropagation and adjusting weights during training, the derivation does not shrink as much and allows the training of more complex structures. +Additionally, a residual block is a measure to prevent overfitting since adding the block improves and enriches the previously existing information only. + +Darknet uses residual blocks in \q{DarknetResidual}, which include two \q{DarknetConv} layers. +The width, height, and depth of the initial layer remain unchanged. + +\paragraph{Darknet Block} +A \q{DarknetBlock} consists of a \q{DarknetConv} layer and several \q{DarknetResidual} blocks. +The \q{DarknetConv} layer halves the width and height of the image while doubling its depth so that the capacity of the information remains intact. +A resulting pixel aggregates four pixels into one and thus encompasses the entire structure. +This step is called pooling. +In many neural networks, max pooling is used, which passes the highest activation to the next layer. +The advantage of pooling by convolution as in YOLOv3 is the increased speed. + +The resulting reduced block is followed by several \q{DarknetResidual} blocks to improve the context of the new size. +The number of blocks varies, and in the process of developing YOLOv3, it got optimized. + +\paragraph{Darknet-53} +Darknet-53 consists of a composition of five Darknet blocks. +Before this, the depth of the image got increased to 32 using a \q{DarknetConv}. + +Each Darknet block halves the width and height and doubles the depth of the image. +That preserves the capacity of the information the layers can hold. +The number of \q{DarknetResidual} blocks used in it varies and represents a local maximum in tests by the developers of YOLOv3. + +\subsubsection{YOLOv3} +The outputs of the last three \q{DarknetResidual} blocks are then passed from the object detection network Darknet-53 to the network to create the anchor boxes. +Continued use of the last three stages instead of just the last output enables the network to recognize smaller objects. + +The final output of the Darknet with the dimensions \q{w/32 x h/32 x 1024}, where w and h are the input sizes, and the depth corresponds to 1024 layers, is processed by a \q{YoloConv} and then processed with a \q{YoloOutput} layer to the dimensions needed for the Anchor Boxes. + +\paragraph{YoloConv} +YOLOv1 already uses a technique to reduce the \q{feature space} of the previous layers. +That means that subsequent layers forget details of the previous layer and therefore have more capacity to learn new, more abstract knowledge. +To achieve this goal, 1x1- and 3x3-Convolutional Layers are applied alternately, and the depth is varied, as shown in the diagram. + +In this part of the neural network, the network learns to extract relevant objects from the general object recognition of Darknet-53 and to set the parameters for the bounding box (\q{anchor box}) in its center. +The center point is usually the location with the highest activation for the object and is therefore easy for the network to find. + +Many objects are located in the center of the image.\cite{DBLP:conf/cvpr/RedmonF17} +Reducing the default resolution from 448x448 (14x14 cells) to 416x416 (13x13 cells) during the transition from YOLOv1 to YOLOv2 ensures the center point is not located on the edge of two cells. +A dedicated cell is now responsible for the image center. +However, this is not relevant for detecting traffic lights since traffic lights are usually located on the side of the road. +When extending the network to other classes, like cars in front, this can be a critical point. + +\paragraph{YoloOutput} +The result of the \q{YoloConv} layer gets transformed into anchor boxes. +Only on this last layer it is important how many classes the output should be trained to. + +The output tensor is four-dimensional. +Two dimensions indicate the cell; +the third dimension indicates the respective anchor box. +Since usually three anchor boxes are used per level, the length of this dimension is also three. +The fourth dimension contains the properties of an anchor box in a cell. +These consist of the anchor box' width, height, shifting of the center of the cell, and the confidence of having an object in the anchor box. +In addition to these five properties, there are the One-Hot encoded classes. +Currently, only the traffic light class gets trained, and the length of the fourth dimension is, therefore, six. + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.50\textwidth} + \includegraphics[width=\linewidth]{../data/bounding-box.png} + \caption{Properties of an anchor box in YOLOv2 (confidence and classes are omitted)\cite{DBLP:conf/cvpr/RedmonF17}} %Bildunterschrift + \label{fig:bounding-box} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +In YOLOv2, the offset of an anchor box \q{bx} and \q{by} is relative to the upper left corner of the image. +In contrast, the anchor box’ offset in YOLOv3 refers to the upper left corner of the containing cell. + +Before training, the sizes of the Anchor Boxes are defined as hyperparameters. +In the reference implementation of YOLOv3 are already nine anchor boxes specified, which are used in this work.\cite{DBLP:conf/cvpr/RedmonF17} +For each of the three resolution levels of YOLOv3 detection, it is suggested to use three anchor boxes. +One anchor box is a horizontal rectangle, one is a vertical rectangle, and one box is approximately one square. +The box that most closely matches the aspect ratio of the object to be recognized is responsible for the recognition. + +If an object is now located in a cell, then the \q{Confidence} of the responsible anchor box in the cell exceeds an arbitrary, but previously defined, required threshold, usually 50 percent. +The class with the highest value in the One-Hot encoding is detected. +Additionally, the remaining four properties of the responsible anchor box specify the position and size of the object. + +\paragraph{YoloConvConcatenate} +The first YoloConv layer results in a layer with a 32nd of the width and height of the original image (in the following called A). +The depth is 512. +The information about approximate positions of possible objects created after this layer should be helpful for the recognition in the more detailed layer (in the following called B). +However, this layer has a 16th of the original width and height and is twice as wide and twice as high. + +The output of the coarse YoloConv layer (A) has to be extrapolated first to connect the two layers. +That allows the detection of smaller objects with the support of already detected large structures. +A crowd of people can lead to a low activation of the class \q{Person}. +This information is passed to the layer with more detailed information. +Based on the already approximate positioning, it is now more accessible for the network to provide more precise positions for the class \q{Person}. +The activation on the layer with more detail is higher and overshadows the activation of the first result. + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{1.0\textwidth} + \includegraphics[width=\linewidth]{../data/YOLOv3.png} + \caption{Network Architecture of Darknet-53} %Bildunterschrift + \label{fig:darknet} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +\subsubsection{Flexible Image Size} +By using only \q{Convolutional Layers} and no \q{Fully Connected Layer} starting with YOLOv2, the output size of the network is variable depending on the original image size and corresponds to a scalar before 1/32. +It makes Darknet incredibly flexible. +It can be applied to any image size, with the runtime increasing linearly with the number of pixels during training, testing, and execution. +By default, the size of YOLO is 416x416 pixels, which results in a final size of 13x13 pixels. +The size is smaller than in YOLOv1 but has an advantage in the case of an odd number of pixels. +The center of the image belongs to one output box and is not on the edge of several. +In many photographs, the subject is centered. +However, this condition is not essential for the detection of traffic lights. + +Another advantage of not using a fully connected layer is that it is independent of location. +Although the network learns the context of an object, for example, a traffic light is usually mounted on a mast, it is irrelevant where the object is located in the image. +In some datasets, the distribution of the positions of the traffic lights diverges between the training and test data. +YOLO can abstract from this and focus on the shape and color of the object and its surroundings. diff --git a/src/04a03-datasets.bib b/src/04a03-datasets.bib new file mode 100644 index 0000000..3a85d1b --- /dev/null +++ b/src/04a03-datasets.bib @@ -0,0 +1,42 @@ +@article{DBLP:journals/corr/CordtsORREBFRS16, + author = {Marius Cordts and + Mohamed Omran and + Sebastian Ramos and + Timo Rehfeld and + Markus Enzweiler and + Rodrigo Benenson and + Uwe Franke and + Stefan Roth and + Bernt Schiele}, + title = {The Cityscapes Dataset for Semantic Urban Scene Understanding}, + journal = {CoRR}, + volume = {abs/1604.01685}, + year = {2016} +} + +@misc{cityscapesYoutube, + title={The Cityscapes Dataset for Semantic Urban Scene Understanding - YouTube}, + url={https://www.youtube.com/watch?v=5_t-U76Ax0U} +} + +@inproceedings{DBLP:conf/icra/FreginMKD18, + author = {Andreas Fregin and + Julian M{\"{u}}ller and + Ulrich Krebel and + Klaus Diermayer}, + title = {The DriveU Traffic Light Dataset: Introduction and Comparison with + Existing Datasets}, + booktitle = {{ICRA}}, + pages = {3376--3383}, + publisher = {{IEEE}}, + year = {2018} +} + +@article{anforderungenLichtsignalanlagen, + title={Technische Anforderungen an Lichtsignalanlagen 2014}, + url={https://www.erfurt.de/mam/ef/leben/arbeit_und_beruf/ausschreibungen/vorschriften/technische_anforderungen_lsa.pdf}, + abstractNote={Forderungen zur Planung und Errichtung von Lichtsignalanlagen, in Trägerschaft der Stadt Erfurt}, + publisher={Stadtverwaltung von Erfurt}, + year={2014}, + month={Jan} +} diff --git a/src/04a03-datasets.tex b/src/04a03-datasets.tex new file mode 100644 index 0000000..9ed0c73 --- /dev/null +++ b/src/04a03-datasets.tex @@ -0,0 +1,59 @@ +\section{Used Datasets}\label{sec:used-datasets} +A central point in the development of neural networks is the use of large and precise data sets. +The creation of datasets for images is very time- and resource-consuming because these datasets have to be labeled individually. +Since there is intensive research in autonomous driving, many large and information-rich datasets are available. + +\subsection{Cityscapes Dataset}\label{subsec:cityscapes-dataset} +A collaboration between Daimler AG, the Max Planck Institute, and Darmstadt Technical University created the Cityscapes dataset in 2016.\cite{DBLP:journals/corr/CordtsORREBFRS16} +Many previously created records contain only a few records, a single city, a few classes, or only bounding box annotations. +The Cityscapes dataset covers over 50 cities in Germany and neighboring countries in three seasons. +The available images have a size of 1024 x 2048 pixels. +All images with its 30 classes of the dataset are labeled by hand.\cite{cityscapesYoutube} + +The used version of the data set contains 41 cities with 22973 images, which are divided accordingly into training and test images. +The data set is designed for the segmentation of various objects and not specifically for the detection of traffic lights. +Therefore the data set contains only 25077 traffic lights in 6734 images. + +\subsection{DriveU Traffic Light Dataset (DTLD)}\label{subsec:driveu-traffic-light-dataset-(dtld)} +Traffic lights can appear in many different appearances. +The shape and size often vary. +A traffic light becomes enlarged when approaching it, and the viewing angle can change due to curves or bending. +Traffic signals are often partially occluded by trees. +An essential part is also the currently displayed color of the signal. +If individual trips are the basis of the datasets, the latter can have a bias. +Predominantly green, but also red traffic lights can be recognized preferentially. +A traffic light in the yellow phase may not be recognized because it appears comparingly less. +Balanced data sets are especially important at this point. + +If a traffic light is still more than 100 meters away, it is only 1.7px wide, under the condition of a width of 20 centimeters (as specified in the European standard EN12368)\cite{anforderungenLichtsignalanlagen}, a camera viewing angle of 100 degrees on an image with a resolution of two megapixels, and thus hardly recognizable by a neural network. +To make matters worse, the image is often scaled down for faster training and detection. +The actual traffic light is now no longer visible on the image - the network detects the light only by its context. +All the more important is a large and variable data set. + +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.65\textwidth} + \includegraphics[width=\linewidth]{../data/traffic_light_on_plane.png} + \caption{Traffic Light Size Within Image} + \label{fig:startseite} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +If the distance d between the car and the traffic light equals 100m and the camera viewing angle $\Theta$ is 100 degrees, the plane p in that distance has a width of 238 meters. +The traffic light is 20cm wide, as states in EN12368. +That results in a share of $0.08 \%$. +If the captured image is 2048px wide, the width of the traffic light corresponds to $1.7$. + +Cooperation between the University of Ulm and Daimler AG developed the DriveU Traffic Light Dataset. +On a total of 43875 pictures, it contains 232039 traffic lights taken in eleven different German cities. +This dataset is specialized for traffic lights, and the labels also include differentiation of the current status of the indicator. +That includes the number of existing light bulbs, the direction indicator, and the current light signal. +This differentiation is not considered at first. +Traffic lights with more or less than three display elements - the typical green, yellow, red - are excluded. + +%TODO Verteilung der Lichter im Bild +%TODO Verteilung der Lichter nach Typ diff --git a/src/05-statutory-declaration.tex b/src/05-statutory-declaration.tex new file mode 100644 index 0000000..ec9fe58 --- /dev/null +++ b/src/05-statutory-declaration.tex @@ -0,0 +1,21 @@ +% Keine Nummerierung für diesen Teil +\section*{Statutory Declarations} +% Keine Kopf- und Fußzeilen ausgeben +% Aber trotzdem ins Inhaltsverzeichnis aufnehmen +%\addcontentsline{toc}{section}{Eidesstattliche Erklärung} + +% Hier der offizielle Text der eidesstattlichen Erklärung +I herewith formally declare that I have written the submitted dissertation independently. +I did not use any outside support except for the quoted literature and other sources mentioned in the paper. + +I clearly marked and separately listed all of the literature and all of the other sources which I employed when producing this academic work, either literally or in content. + +I am aware that the violation of this regulation will lead to failure of the thesis. +% Etwas Abstand für die Unterschrift +\vspace{2cm} + +% Hier kommt die Unterschrift drüber +\begin{tabular}{lp{4em}l} + \hspace{5cm} && \hspace{4cm} \\\cline{1-1}\cline{3-3} + Ort, Datum && \studentName +\end{tabular} diff --git a/src/main.tex b/src/main.tex new file mode 100644 index 0000000..99b585b --- /dev/null +++ b/src/main.tex @@ -0,0 +1,272 @@ +% Zur Verfügung gestellt von Johannes Woske, IT2010, j.woske@gmail.com +\documentclass[ + 12pt, %Schriftgröße + a4paper, + liststotoc, %Inhaltsverzeichniseinträge für Listen (z.B. Abbildungen) + bibtotoc, %Inhaltsverzeichniseinträge f+r Quellen + pointlessnumbers, %Entfernt Punkt hinter Gliederungsnummern + USenglish, %Sprachpaket + headsepline, %Headertrennlinie +% footsepline, %Footertrennlinie + twoside %einseitiges Druckformat + ]{scrbook} %Dokumentenklasse (Koma-Script) +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage[USenglish]{babel} +\usepackage{url} +\usepackage{graphicx} %Bilder einfügen +%\usepackage{pdfpages} %PDF einfügen +\usepackage[a4paper, margin=1in]{geometry} +\usepackage[right]{eurosym} %Euro-Zeichen +\usepackage{amssymb} +\usepackage{cite} %Quellenangaben +\usepackage[ + colorlinks, % Links ohne Umrandungen in zu wählender Farbe + linkcolor=black, % Farbe interner Verweise + filecolor=black, % Farbe externer Verweise + citecolor=black, % Farbe von Zitaten + urlcolor=black % Farbe von Links +]{hyperref} %Verlinkungen +\usepackage[USenglish]{translator} +\usepackage{blindtext} % Lorem-Ipsum-Plugin +%Glossar-Paket laden +\usepackage[ + nonumberlist, %keine Seitenzahlen anzeigenscrbook.cls + acronym, %ein Abkürzungsverzeichnis erstellen + toc, %Einträge im Inhaltsverzeichnis + %section %im Inhaltsverzeichnis auf section-Ebene erscheinen + ] +{glossaries} +\usepackage[printonlyused]{acronym} +\usepackage{csquotes} +\usepackage{booktabs} +%\usepackage{struktex} +\usepackage{verbatim} +%\usepackage{tabularx} +\usepackage{ltablex}\keepXColumns +\usepackage{makecell} +\usepackage{enumitem} +\newcommand{\bc}[1]{{\centering\bf #1}} +\newcommand{\myref}[1]{{(see \autoref{#1}~\nameref{#1})}} +\newcommand{\q}[1]{{\enquote{#1}}} + + +\newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}} +\newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}} +\newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}} + + +\makeatletter +\renewcommand{\@chapapp}{}% Not necessary... +\newenvironment{chapquote}[2][2em] + {\setlength{\@tempdima}{#1}% + \def\chapquote@author{#2}% + \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax% + \itshape} + {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip} +\makeatother + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%% Studentenname +\newcommand{\studentName}{Seedorf, Sebastian} +%%%%%%%%%%%% Typ der Arbeit +\newcommand{\type}{Masterarbeit} +%%%%%%%%%%%% Thema +\newcommand{\topic}{Detection of Traffic Lights in Urban Traffic Using a Convolutional Neural Network (CNN)} +%%%%%%%%%%%% Matrikelnummer +\newcommand{\matrikel}{5204360} +%%%%%%%%%%%% Erstbetreuer +\newcommand{\erstBetreuer}{Prof. Dr. Daniel Goehring} +%%%%%%%%%%%% Zweitbetreuer +\newcommand{\zweitBetreuer}{Prof. Dr. Dr. (h.c.) habil. Raúl Rojas} +%%%%%%%%%%%% Jahrgang +\newcommand{\jahrgang}{2017} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\usepackage{listings,xcolor} %Codeanzeige +\definecolor{dkgreen}{rgb}{0,.6,0} +\definecolor{dkblue}{rgb}{0,0,.6} +\definecolor{dkyellow}{cmyk}{0,0,.8,.3} + +\lstset{ + numbers=left, + numberstyle=\tiny, + numbersep=5pt, + breaklines=true, + frame=single, + escapeinside={(*@}{@*)}, %nicht anzuzeigende Ausdrücke, z.B. für Labels + language=sh, + basicstyle=\ttfamily\fontsize{10}{12}\selectfont, + keywordstyle = \color{dkblue}, + stringstyle = \color{red}, + identifierstyle = \color{dkgreen}, + commentstyle = \color{gray}, + emph =[1]{php}, + emphstyle =[1]\color{black}, + emph =[2]{if,and,or,else}, + emphstyle =[2]\color{dkyellow} + } + +\colorlet{punct}{red!60!black} +\definecolor{background}{HTML}{EEEEEE} +\definecolor{delim}{RGB}{20,105,176} +\colorlet{numb}{magenta!60!black} +\lstdefinelanguage{json}{ + basicstyle=\normalfont\ttfamily, + numbers=left, + numberstyle=\scriptsize, + stepnumber=1, + numbersep=8pt, + showstringspaces=false, + breaklines=true, + frame=lines, + backgroundcolor=\color{background}, + literate= + *{0}{{{\color{numb}0}}}{1} + {1}{{{\color{numb}1}}}{1} + {2}{{{\color{numb}2}}}{1} + {3}{{{\color{numb}3}}}{1} + {4}{{{\color{numb}4}}}{1} + {5}{{{\color{numb}5}}}{1} + {6}{{{\color{numb}6}}}{1} + {7}{{{\color{numb}7}}}{1} + {8}{{{\color{numb}8}}}{1} + {9}{{{\color{numb}9}}}{1} + {:}{{{\color{punct}{:}}}}{1} + {,}{{{\color{punct}{,}}}}{1} + {\{}{{{\color{delim}{\{}}}}{1} + {\}}{{{\color{delim}{\}}}}}{1} + {[}{{{\color{delim}{[}}}}{1} + {]}{{{\color{delim}{]}}}}{1}, +} + +\makeatletter +\AtBeginDocument{% + \let\c@lstlisting\c@table + \let\c@figure\c@lstlisting + \let\thefigure\thelstlisting + \let\ftype@lstlisting\ftype@figure % give the floats the same precedence + + + \let\ftype@lstlisting\ftype@figure + \renewcommand*{\ext@figure}{lot} + %\let\c@figure\c@table + \let\ftype@figure\ftype@table + \let\listoftableandfigures\listoftables + \renewcommand*\listtablename{Tables and Figures} +} +\makeatother + + +\makeatletter \def\cleardoublepage{\clearpage\if@twoside +\ifodd\c@page\else \hbox{} \vspace*{\fill} +\thispagestyle{empty} \newpage +\if@twocolumn\hbox{}\newpage\fi\fi\fi} \makeatother + + +% Seitenabstände definieren +\geometry{verbose,tmargin=3.5cm,bmargin=3.5cm,lmargin=3.5cm,rmargin=2.5cm} +\linespread{1.2} + +% Witwe und Waise +\clubpenalty = 10000 \widowpenalty = 10000 \displaywidowpenalty = 10000 + +\newcommand{\footfigref}[1]{\footnote{Abb. \ref{#1} auf Seite \pageref{#1}}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% BEISPIEL BILD +%\begin{figure}[h] +% \includegraphics[scale=0.8]{AlphaBeta_02.png} +% \label{fig:alphabeta} %fig:ID +% \caption{Parametertests} %Bildunterschrift +%\end{figure}\\ +%%%%%%%%%% BILDVERWEIS IM TEXT MIT FUSSNOTE +%\footfigref{fig:alphabeta} %fig:ID +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%falsche Default-Silbentrennung überschreiben +%\include{hyphenation} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Ein eigenes Symbolverzeichnis erstellen +\newglossary[slg]{symbolslist}{syi}{syg}{List of Symbols} + +%Den Punkt am Ende jeder Beschreibung deaktivieren +\renewcommand*{\glspostdescription}{} + +%Glossar-Befehle anschalten +\makeglossaries + +%Diese Befehle sortieren die Einträge in den +%einzelnen Listen: +%makeindex -s datei.ist -t datei.alg -o datei.acr datei.acn +%makeindex -s datei.ist -t datei.glg -o datei.gls datei.glo +%makeindex -s datei.ist -t datei.slg -o datei.syi datei.syg + +%\include{glossar} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%% Dokumenteninhalt +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +%Titelblatt +\include{01-title} + +\pagenumbering{Roman}%römische Seitenzahlen + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%% Abstract + +\include{02-abstract} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%% Inhaltsverzeichnis +\cleardoublepage +\addcontentsline{toc}{chapter}{Table of Contents} + +\setcounter{tocdepth}{3} +\tableofcontents + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%% Abbildungsverzecihnis +%\listoffigures +%\addcontentsline{toc}{chapter}{Abbildungsverzeichnis} +%\listoftables +%\lstlistoflistings +\listoftableandfigures + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%% Glossar ausgeben +\include{03-acronym} + +\cleardoublepage + +\pagenumbering{arabic} %arabische Seitenzahlen + +\cleardoublepage + +\include{04-content} + +\newpage + +%%%%%%%%%%%%%%%%%%%%%%%%%% +% Quellen +%%%%%%%%%%%%%%%%%%%%%%%%% +\pagenumbering{Roman}%römische Seitenzahlen +\setcounter{page}{6} +\bibliographystyle{ieeetr} +\bibliography{02-abstract,04a01-requirement-analysis,04a02-yolo,04a03-datasets,04-a21-implementation-init} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Eidesstattliche Erklärung +%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\newpage +\thispagestyle{empty} + +\include{05-statutory-declaration} + +\end{document} diff --git a/src/templates.tex b/src/templates.tex new file mode 100644 index 0000000..7d3e6d8 --- /dev/null +++ b/src/templates.tex @@ -0,0 +1,42 @@ +% Table +\def\arraystretch{1.5} +\begin{tabularx}{\textwidth}{|L{2cm}|X|} \hline +\textbf{ID} & \textbf{Anforderung} \\ \hline +F-AUT-1 & Der Zugriff auf das System darf nur durch einen Login erfolgen. \linebreak \linebreak Für Anforderungen zur Authentifizierung siehe auch NF-SEC-1 bis NF-SEC-4. \\ \hline +F-AUT-2 & Falls der Benutzer auf durch Login beschränkte Daten zugreifen möchte und nicht eingeloggt ist, muss der Benutzer zum Login aufgefordert und weitergeleitet werden. \\ \hline +F-AUT-3 & Falls der Benutzer auf die Login-Seite zugreifen möchte und bereits eingeloggt ist, muss der Benutzer auf eine Seite weitergeleitet werden, die nach dem Login zur Verfügung steht. \\ \hline +\end{tabularx} +\def\arraystretch{1} + +% Image +\begin{figure}[!htbp] + \vspace{0cm} + \minipage{0.0\textwidth} + \endminipage\hfill + \minipage{0.85\textwidth} + \includegraphics[width=\linewidth]{../data/Darknet.png} + \caption{Startseite des Systems \q{BIO.stada}} %Bildunterschrift + \label{fig:startseite} %fig:ID + \endminipage\hfill + \minipage{0.0\textwidth} + \endminipage +\end{figure} + +% Ref +\myref{sec:Versionsverwaltung} + +% Code +\begin{lstlisting}[language=json,caption={Antwort des Backend-Servers bei fehlendem Login}] +{ + "meta": { + "code": 103001, + "text": "You need to be logged in to fulfil this request!" + }, + "body": null +} +\end{lstlisting} + +% Quote +\begin{chapquote}{Thomas Zink, Datenmanager; \textit{3.8., Gedächtnisprotokoll von Sebastian Seedorf}} + \q{Momentan bietet das System noch nicht allzu viele Vorzüge im Vergleich zur Access-Datenbank. Zumal nicht sonderlich viele neue Daten regelmäßig dazu kommen, ist ein Austauschen der neuen Datenbank kein sonderliches Problem. Wenn das System dann bald soweit ist [Verweis auf seine Tickets im Issue-Tracker, A. der Red.], dass man auch DDS [\acl{DDS}, A. der Red.] automatisch als XLSX erzeugen kann, dann erleichtert es Unmengen an Arbeit. Es muss nicht zwingend XLSX sein, PDF wäre mir sogar lieber, aber das ist ja dann eine Frage der Implementierung.} +\end{chapquote}