Source code for dabu.analyse_data_structure.analyse_data_structure
"""
:Author: Daniel Mohr
:Email: daniel.mohr@dlr.de
:Date: 2021-02-17, 2021-07-29 (last change).
:License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
"""
import os
import re
def check_file_available(files, key):
"""
:Author: Daniel Mohr
:Email: daniel.mohr@dlr.de
:Date: 2021-01-19 (last change).
"""
res = None
key = key.lower()
for filename in files:
if filename.lower().startswith(key):
res = filename
break
return res
def add_append_integrate_data(store, key, data):
"""
:Author: Daniel Mohr
:Email: daniel.mohr@dlr.de
:Date: 2021-02-04 (last change).
"""
if ((key in store) and
store[key] != data):
if isinstance(store[key], (list, tuple)):
if data not in store[key]:
store[key].append(data)
else:
store[key] = [
store[key],
data]
else:
store[key] = data
[docs]def analyse_data_structure(path_name='.', result=None):
"""
:Author: Daniel Mohr
:Email: daniel.mohr@dlr.de
:Date: 2021-07-29 (last change).
Analyse the data structure of the given path.
:param path_name: directory path to analyse
:param result: you can give a dict, where the results are appended
or overridden
"""
# pylint: disable=too-many-branches
if result is None:
result = dict()
file_names = [] # only files in the actual directory
dir_names = [] # only directory in the actual directory
all_file_names = [] # all other files in the directory tree
for (dirpath, dirs, filenames) in os.walk(path_name):
if os.path.samefile(dirpath, '.'):
for file_name in filenames:
file_names.append(file_name)
dir_names.append(dirs)
else:
for file_name in filenames:
all_file_names.append(os.path.join(dirpath, file_name))
analysed_file_names = []
# find README, LICENSE, MANIFEST
for key in ['readme', 'license', 'manifest']:
res = check_file_available(file_names, key)
if res is not None:
result[key] = res
analysed_file_names.append(res)
# analyse if directory is a repository
for dirname in dir_names:
if (dirname in ['.git', '.bzr']) and os.path.isdir(dirname):
# assume repository
result['repository'] = dirname
analysed_file_names.append(dirname)
add_append_integrate_data(
result, 'data integrity control', 'repository')
break
# analyse if checksums are available (look for checksums)
regexp = re.compile(
r'.*checksum.*|.*\.md5|.*\.sha256|.*\.sha512|.*\.sha1',
flags=re.IGNORECASE)
for filename in file_names:
if regexp.findall(filename):
result['checksum file'] = filename
analysed_file_names.append(filename)
add_append_integrate_data(
result, 'data integrity control', 'checksums')
break
result['data'] = list(set(file_names).difference(analysed_file_names))
result['data'] += all_file_names
if not bool(result['data']):
del result['data']
# result['author'] = [{'name': 'foo', 'email': 'bar'},
# {'name': 'a', 'email': 'b'}]
return result