@article {seiter2014discovery, title = {Discovery of activity composites using topic models: An analysis of unsupervised methods}, journal = {Pervasive and Mobile Computing}, volume = {15}, year = {2014}, note = {Special Issue on Data Mining in Pervasive Environments}, pages = {215 - 227}, abstract = {Abstract In this work we investigate unsupervised activity discovery approaches using three topic model~(TM) approaches, based on Latent Dirichlet Allocation~(LDA), n -gram TM~(NTM), and correlated TM~(CTM). While \{LDA\} structures activity primitives, \{NTM\} adds primitive sequence information, and \{CTM\} exploits co-occurring topics. We use an activity composite/primitive abstraction and analyze three public datasets with different properties that affect the discovery, including primitive rate, activity composite specificity, primitive sequence similarity, and composite-instance ratio. We compare the activity composite discovery performance among the \{TM\} approaches and against a baseline using k -means clustering. We provide guidelines for method and optimal \{TM\} parameter selection, depending on data properties and activity primitive noise. Results indicate that \{TMs\} can outperform k -means clustering up to 17\%, when composite specificity is low. LDA-based \{TMs\} showed higher robustness against noise compared to other \{TMs\} and k -means.}, keywords = {Activity routines}, issn = {1574-1192}, doi = {http://dx.doi.org/10.1016/j.pmcj.2014.05.007}, url = {http://www.sciencedirect.com/science/article/pii/S1574119214000832}, author = {Julia Seiter and Oliver Amft and Mirco Rossi and Gerhard Tr{\"o}ster} }