2020
|
Smolic, Fang-Yi Chao; Cagri Ozcinar; Lu Zhang; Wassim Hamidouche; Olivier Deforges; Aljosa Towards Audio-Visual Saliency Prediction for Omnidirectional Video with Spatial Audio Conference 2020 IEEE International Conference on Visual Communications and Image Processing (VCIP) IEEE, China, 2020. @conference{Smolic2020,
title = {Towards Audio-Visual Saliency Prediction for Omnidirectional Video with Spatial Audio},
author = {Fang-Yi Chao; Cagri Ozcinar; Lu Zhang; Wassim Hamidouche; Olivier Deforges; Aljosa Smolic},
url = {https://ieeexplore.ieee.org/abstract/document/9301766},
doi = {10.1109/VCIP49819.2020.9301766},
year = {2020},
date = {2020-12-01},
publisher = {IEEE},
address = {China},
organization = {2020 IEEE International Conference on Visual Communications and Image Processing (VCIP)},
abstract = {Omnidirectional videos (ODVs) with spatial audio enable viewers to perceive 360° directions of audio and visual signals during the consumption of ODVs with head-mounted displays (HMDs). By predicting salient audio-visual regions, ODV systems can be optimized to provide an immersive sensation of audio-visual stimuli with high-quality. Despite the intense recent effort for ODV saliency prediction, the current literature still does not consider the impact of auditory information in ODVs. In this work, we propose an audio-visual saliency (AVS360) model that incorporates 360° spatial-temporal visual representation and spatial auditory information in ODVs. The proposed AVS360 model is composed of two 3D residual networks (ResNets) to encode visual and audio cues. The first one is embedded with a spherical representation technique to extract 360° visual features, and the second one extracts the features of audio using the log mel-spectrogram. We emphasize sound source locations by integrating audio energy map (AEM) generated from spatial audio description (i.e., ambisonics) and equator viewing behavior with equator center bias (ECB). The audio and visual features are combined and fused with AEM and ECB via attention mechanism. Our experimental results show that the AVS360 model has significant superiority over five state-of-the-art saliency models. To the best of our knowledge, it is the first work that develops the audio-visual saliency model in ODVs. The code will be publicly available to foster future research on audio-visual saliency in ODVs.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Omnidirectional videos (ODVs) with spatial audio enable viewers to perceive 360° directions of audio and visual signals during the consumption of ODVs with head-mounted displays (HMDs). By predicting salient audio-visual regions, ODV systems can be optimized to provide an immersive sensation of audio-visual stimuli with high-quality. Despite the intense recent effort for ODV saliency prediction, the current literature still does not consider the impact of auditory information in ODVs. In this work, we propose an audio-visual saliency (AVS360) model that incorporates 360° spatial-temporal visual representation and spatial auditory information in ODVs. The proposed AVS360 model is composed of two 3D residual networks (ResNets) to encode visual and audio cues. The first one is embedded with a spherical representation technique to extract 360° visual features, and the second one extracts the features of audio using the log mel-spectrogram. We emphasize sound source locations by integrating audio energy map (AEM) generated from spatial audio description (i.e., ambisonics) and equator viewing behavior with equator center bias (ECB). The audio and visual features are combined and fused with AEM and ECB via attention mechanism. Our experimental results show that the AVS360 model has significant superiority over five state-of-the-art saliency models. To the best of our knowledge, it is the first work that develops the audio-visual saliency model in ODVs. The code will be publicly available to foster future research on audio-visual saliency in ODVs. |
Rossi, Silvia; Ozcinar, Cagri; Smolic, Aljosa; Toni, Laura Do Users Behave Similarly in VR? Investigation of the User Influence on the System Design Journal Article In: ACM Transactions on Multimedia Computing Communications and Applications (TOMM), 2020. @article{Rossi2020,
title = {Do Users Behave Similarly in VR? Investigation of the User Influence on the System Design},
author = {Silvia Rossi and Cagri Ozcinar and Aljosa Smolic and Laura Toni},
url = {https://v-sense.scss.tcd.ie:443/research/3dof/vr_user_behaviour_system_design/},
year = {2020},
date = {2020-02-03},
urldate = {2020-02-03},
journal = {ACM Transactions on Multimedia Computing Communications and Applications (TOMM)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2019
|
Croci, Simone; Ozcinar, Cagri; Zerman, Emin; Cabrera, Julian; Smolic, Aljosa Voronoi-based Objective Quality Metrics for Omnidirectional Video Inproceedings In: 11th International Conference on Quality of Multimedia Experience (QoMEX 2019), 2019. @inproceedings{Croci2019,
title = {Voronoi-based Objective Quality Metrics for Omnidirectional Video},
author = {Simone Croci and Cagri Ozcinar and Emin Zerman and Julian Cabrera and Aljosa Smolic},
url = {https://v-sense.scss.tcd.ie:443/research/voronoi-based-objective-metrics/
https://v-sense.scss.tcd.ie:443/wp-content/uploads/2019/03/QoMEX2019.pdf},
year = {2019},
date = {2019-06-06},
booktitle = {11th International Conference on Quality of Multimedia Experience (QoMEX 2019)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ozcinar, Cagri; Cabrera, Julian; Smolic, Aljosa Visual Attention-Aware Omnidirectional Video Streaming Using Optimal Tiles for Virtual Reality Journal Article In: IEEE Journal on Emerging and Selected Topics in Circuits and Systems , 2019. @article{Ozcinar2019,
title = {Visual Attention-Aware Omnidirectional Video Streaming Using Optimal Tiles for Virtual Reality},
author = {Cagri Ozcinar and Julian Cabrera and Aljosa Smolic},
url = {https://v-sense.scss.tcd.ie:443/research/va-aware-odv-streaming/
https://v-sense.scss.tcd.ie:443/wp-content/uploads/2019/03/JETCAS_SI_immersive_2018_pc.pdf},
year = {2019},
date = {2019-05-15},
journal = {IEEE Journal on Emerging and Selected Topics in Circuits and Systems },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2018
|
Ozcinar, Cagri; Cabrera, Julian; Smolic, Aljosa Omnidirectional Video Streaming Using Visual Attention-Driven Dynamic Tiling for VR Inproceedings In: IEEE International Conference on Visual Communications and Image Processing (VCIP) 2018, Taichung, Taiwan, 2018. @inproceedings{Ozcinar2018b,
title = {Omnidirectional Video Streaming Using Visual Attention-Driven Dynamic Tiling for VR},
author = {Cagri Ozcinar and Julian Cabrera and Aljosa Smolic},
url = {https://v-sense.scss.tcd.ie:443/wp-content/uploads/2019/03/VCIP_2018.pdf
https://v-sense.scss.tcd.ie:443/research/3dof/360-degree-video-coding-and-streaming-for-virtual-reality/},
year = {2018},
date = {2018-12-09},
booktitle = {IEEE International Conference on Visual Communications and Image Processing (VCIP) 2018},
address = {Taichung, Taiwan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ozcinar, Cagri; Cabrera, Julian; Smolic, Aljosa Viewport-aware omnidirectional video streaming using visual attention and dynamic tiles Inproceedings In: 7-th European Workshop on Visual Information Processing (EUVIP) 2018, 2018. @inproceedings{Ozcinar2018c,
title = {Viewport-aware omnidirectional video streaming using visual attention and dynamic tiles},
author = {Cagri Ozcinar and Julian Cabrera and Aljosa Smolic},
url = {https://v-sense.scss.tcd.ie:443/wp-content/uploads/2019/03/EUVIP_2018_co.pdf
https://v-sense.scss.tcd.ie:443/?p=519},
year = {2018},
date = {2018-11-27},
booktitle = {7-th European Workshop on Visual Information Processing (EUVIP) 2018},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2017
|
Ozcinar, Cagri; Abreu, Ana De; Knorr, Sebastian; Smolic, Aljosa Estimation of optimal encoding ladders for tiled 360° VR video in adaptive streaming systems Inproceedings In: The 19th IEEE International Symposium on Multimedia (ISM 2017), Taichung, Taiwan, 2017. @inproceedings{OzcinarISM2017,
title = {Estimation of optimal encoding ladders for tiled 360° VR video in adaptive streaming systems},
author = { Cagri Ozcinar and Ana De Abreu and Sebastian Knorr and Aljosa Smolic},
url = {https://v-sense.scss.tcd.ie:443/wp-content/uploads/2018/02/ISM_2017_pcopy.pdf
http://ieeexplore.ieee.org/document/8241580/
https://arxiv.org/pdf/1711.03362.pdf
https://www.researchgate.net/publication/320274287_Estimation_of_Optimal_Encoding_Ladders_for_Tiled_360_VR_Video_in_Adaptive_Streaming_Systems?tab=overview},
year = {2017},
date = {2017-12-11},
booktitle = {The 19th IEEE International Symposium on Multimedia (ISM 2017)},
address = {Taichung, Taiwan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ozcinar, Cagri; Abreu, Ana De; Smolic, Aljosa Viewport-aware adaptive 360° video streaming using tiles for virtual reality Inproceedings In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 2174-2178, Beijing, China, 2017, ISBN: 2381-8549. @inproceedings{Ozcinar2017,
title = {Viewport-aware adaptive 360° video streaming using tiles for virtual reality},
author = { Cagri Ozcinar and Ana De Abreu and Aljosa Smolic},
url = {https://www.researchgate.net/publication/316990176_VIEWPORT-AWARE_ADAPTIVE_360_VIDEO_STREAMING_USING_TILES_FOR_VIRTUAL_REALITY
http://ieeexplore.ieee.org/document/8296667/
https://arxiv.org/pdf/1711.02386.pdf
},
doi = {10.1109/ICIP.2017.8296667},
isbn = {2381-8549},
year = {2017},
date = {2017-09-30},
booktitle = {2017 IEEE International Conference on Image Processing (ICIP)},
pages = {2174-2178},
address = {Beijing, China},
abstract = {360◦ video is attracting an increasing amount of atten- tion in the context of Virtual Reality (VR). Owing to its very high-resolution requirements, existing professional stream- ing services for 360◦ video suffer from severe drawbacks. This paper introduces a novel end-to-end streaming system from encoding to displaying, to transmit 8K resolution 360◦ video and to provide an enhanced VR experience using Head Mounted Displays (HMDs). The main contributions of the proposed system are about tiling, integration of the MPEG- Dynamic Adaptive Streaming over HTTP (DASH) standard, and viewport-aware bitrate level selection. Tiling and adap- tive streaming enable the proposed system to deliver very high-resolution 360◦ video at good visual quality. Further, the proposed viewport-aware bitrate assignment selects an optimum DASH representation for each tile in a viewport-aware manner. The quality performance of the proposed system is verified in simulations with varying network band- width using realistic view trajectories recorded from user experiments. Our results show that the proposed streaming system compares favorably compared to existing methods in terms of PSNR and SSIM inside the viewport.
Our streaming system is available as an open source library .},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
360◦ video is attracting an increasing amount of atten- tion in the context of Virtual Reality (VR). Owing to its very high-resolution requirements, existing professional stream- ing services for 360◦ video suffer from severe drawbacks. This paper introduces a novel end-to-end streaming system from encoding to displaying, to transmit 8K resolution 360◦ video and to provide an enhanced VR experience using Head Mounted Displays (HMDs). The main contributions of the proposed system are about tiling, integration of the MPEG- Dynamic Adaptive Streaming over HTTP (DASH) standard, and viewport-aware bitrate level selection. Tiling and adap- tive streaming enable the proposed system to deliver very high-resolution 360◦ video at good visual quality. Further, the proposed viewport-aware bitrate assignment selects an optimum DASH representation for each tile in a viewport-aware manner. The quality performance of the proposed system is verified in simulations with varying network band- width using realistic view trajectories recorded from user experiments. Our results show that the proposed streaming system compares favorably compared to existing methods in terms of PSNR and SSIM inside the viewport.
Our streaming system is available as an open source library . |