@PHDTHESIS{Gottron:2008e, author = {Thomas Gottron}, title = {Content Extraction: Identifying the Main Content in HTML Documents}, school = {Johannes Gutenberg-University, Mainz}, year = {2008}, owner = {gotti}, timestamp = {2009.04.24} }