@article {10.3844/jcssp.2022.1021.1029, article_type = {journal}, title = {Quantization and Pipelined Hardware Implementation of Deep Neural Network Models}, author = {Cheikh Tourad, El Hadrami and Eleuldj, Mohsine}, volume = {18}, number = {11}, year = {2022}, month = {Oct}, pages = {1021-1029}, doi = {10.3844/jcssp.2022.1021.1029}, url = {https://thescipub.com/abstract/jcssp.2022.1021.1029}, abstract = {Inrecent years, Deep Neural Networks (DNNs) have garnered much interest due toadvances in computational power and data availability. Indeed, DNNs presents a considerable advantage in several challenges, suchas classification problems and video analysis. Although, such accomplishmentleads to significantly increasing energy demands, computational expenses, andmemory capacity. In addition, current efficient DNNs may have more complex andextensive structures. As a result, implementing these huge models on embedded systems with limited sources ischallenging. However, several works have attempted to solve the implementationissues while maintaining optimum accuracy. Among these ideas is compressing themodel size using the quantization method and deploying it on FieldProgrammable Gate Arrays (FPGA) to enhance the latency and minimize the energy cost. This article presents a modeloptimizer using quantization methods to ensure the model hardware implementation.This optimizer compresses the model size and is integrated with a design flowthat implements the model on the hardware. Furthermore, this article presents"DNN2FPGA," a design flow that can automatically implement the DeepLearning models on FPGA by producing pipelined HDL codes. This articleindicates an excellent performance by decreasing the model's size and latencyby 4x while maintaining the model's accuracy. It also presents a full review ofthe state of the art.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }