1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-02-17, 2021-07-29 (last change). 

:License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007. 

""" 

 

import os 

import re 

 

 

def check_file_available(files, key): 

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-01-19 (last change). 

""" 

res = None 

key = key.lower() 

for filename in files: 

if filename.lower().startswith(key): 

res = filename 

break 

return res 

 

 

def add_append_integrate_data(store, key, data): 

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-02-04 (last change). 

""" 

33 ↛ 35line 33 didn't jump to line 35, because the condition on line 33 was never true if ((key in store) and 

store[key] != data): 

if isinstance(store[key], (list, tuple)): 

if data not in store[key]: 

store[key].append(data) 

else: 

store[key] = [ 

store[key], 

data] 

else: 

store[key] = data 

 

 

def analyse_data_structure(path_name='.', result=None): 

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-07-29 (last change). 

 

Analyse the data structure of the given path. 

 

:param path_name: directory path to analyse 

:param result: you can give a dict, where the results are appended 

or overridden 

""" 

# pylint: disable=too-many-branches 

59 ↛ 61line 59 didn't jump to line 61, because the condition on line 59 was never false if result is None: 

result = dict() 

file_names = [] # only files in the actual directory 

dir_names = [] # only directory in the actual directory 

all_file_names = [] # all other files in the directory tree 

for (dirpath, dirs, filenames) in os.walk(path_name): 

if os.path.samefile(dirpath, '.'): 

for file_name in filenames: 

file_names.append(file_name) 

dir_names.append(dirs) 

else: 

for file_name in filenames: 

all_file_names.append(os.path.join(dirpath, file_name)) 

analysed_file_names = [] 

# find README, LICENSE, MANIFEST 

for key in ['readme', 'license', 'manifest']: 

res = check_file_available(file_names, key) 

if res is not None: 

result[key] = res 

analysed_file_names.append(res) 

# analyse if directory is a repository 

for dirname in dir_names: 

81 ↛ 83line 81 didn't jump to line 83, because the condition on line 81 was never true if (dirname in ['.git', '.bzr']) and os.path.isdir(dirname): 

# assume repository 

result['repository'] = dirname 

analysed_file_names.append(dirname) 

add_append_integrate_data( 

result, 'data integrity control', 'repository') 

break 

# analyse if checksums are available (look for checksums) 

regexp = re.compile( 

r'.*checksum.*|.*\.md5|.*\.sha256|.*\.sha512|.*\.sha1', 

flags=re.IGNORECASE) 

for filename in file_names: 

if regexp.findall(filename): 

result['checksum file'] = filename 

analysed_file_names.append(filename) 

add_append_integrate_data( 

result, 'data integrity control', 'checksums') 

break 

result['data'] = list(set(file_names).difference(analysed_file_names)) 

result['data'] += all_file_names 

if not bool(result['data']): 

del result['data'] 

# result['author'] = [{'name': 'foo', 'email': 'bar'}, 

# {'name': 'a', 'email': 'b'}] 

return result