@article{,
title= {Elastic Malware Benchmark for Empowering Researchers 2017 Part 1},
keywords= {},
author= {},
abstract= {The EMBER dataset is a collection of features from PE files that serve as a benchmark dataset for researchers. The EMBER2017 dataset contained features from 1.1 million PE files scanned in or before 2017 and the EMBER2018 dataset contains features from 1 million PE files scanned in or before 2018. This repository makes it easy to reproducibly train the benchmark models, extend the provided feature set, or classify new PE files with the benchmark models.
This paper describes many more details about the dataset: https://arxiv.org/abs/1804.04637
# Cite
H. Anderson and P. Roth, "EMBER: An Open Dataset for Training Static PE Malware Machine Learning Models”, in ArXiv e-prints. Apr. 2018.
```
@ARTICLE{2018arXiv180404637A,
author = {{Anderson}, H.~S. and {Roth}, P.},
title = "{EMBER: An Open Dataset for Training Static PE Malware Machine Learning Models}",
journal = {ArXiv e-prints},
archivePrefix = "arXiv",
eprint = {1804.04637},
primaryClass = "cs.CR",
keywords = {Computer Science - Cryptography and Security},
year = 2018,
month = apr,
adsurl = {http://adsabs.harvard.edu/abs/2018arXiv180404637A},
}
```
https://i.imgur.com/eor0Szg.png
},
terms= {},
license= {https://opensource.org/licenses/MIT},
superseded= {},
url= {https://github.com/elastic/ember}
}