Shakerato

YouTube-8M dataset file renaming for avoiding duplicate file names in Windows (Python code) 본문

Research

YouTube-8M dataset file renaming for avoiding duplicate file names in Windows (Python code)

Shakeratto 2018. 6. 6. 21:36
## To avoid duplicate file names problem in Windows (e.g. Ab.txt, aB.txt),
## At first, I download the dataset and renamed the file names in the Linux OS.
## then, I copy renamed files to my Windows desktop for training deep neural network.
## Renamed dataset also works well on the YouTube-8M training code.

import os
import shutil

oldpath = '/data/yt8m/v2/video/'
newpath = '/data/yt8m/v2/video2/'

file_list = os.listdir(oldpath)
file_train_cnt = 0
file_test_cnt = 0
file_validate_cnt = 0
new_file_name = ''

for filename in file_list:
old_file_path = os.path.join(oldpath, filename)

if 'tra' in filename[:3] and '.tfrecord' in filename:
new_file_name = 'train' + str(file_train_cnt).zfill(5) + '_' + \
str(filename).split('.')[0] + '.tfrecord'
file_train_cnt += 1

elif 'tes' in filename[:3] and '.tfrecord' in filename:
new_file_name = 'test' + str(file_test_cnt).zfill(5) + '_' + \
str(filename).split('.')[0] + '.tfrecord'
file_test_cnt += 1

elif 'val' in filename[:3] and '.tfrecord' in filename:
new_file_name = 'validate' + str(file_validate_cnt).zfill(5) + \
'_' + str(filename).split('.')[0] + '.tfrecord'
file_validate_cnt += 1

else:
continue

new_file_path = os.path.join(newpath, new_file_name)

with open(old_file_path, 'rb') as f1, open(new_file_path, 'wb') as f2:
f2.write(f1.read())

print('Successfuly Copied! [', filename, ' to ', new_file_path, ']')

print(file_train_cnt)
print(file_test_cnt)
print(file_validate_cnt)


Comments