1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-02-08 (last change). 

:License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007. 

""" 

 

import datetime 

import re 

import time 

 

 

def date_rdate2isoformat(data): 

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-02-08 (last change). 

""" 

splited = data.split() 

return datetime.date(int(splited[0]), 

int(splited[1]), 

int(splited[2])).isoformat() 

 

 

def check_nasa_ames_format(filename, output_format='human_readable'): 

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-02-08 (last change). 

 

Checks the given file for the nasa ames format, see: 

 

* http://cedadocs.ceda.ac.uk/73/ 

* http://cedadocs.ceda.ac.uk/73/4/index.html 

* http://cedadocs.ceda.ac.uk/73/4/FFI-summary.html 

 

:param filename: file to analyse 

""" 

# pylint: disable=too-many-locals,too-many-branches,too-many-statements 

result = dict() 

checker_name = 'pydabu (nasa ames format check)' 

addresult = dict() 

result[checker_name] = dict() 

result[checker_name]['error'] = 0 

result[checker_name]['warning'] = 0 

result[checker_name]['log'] = [] 

metadata_part = [] 

with open(filename, mode='r') as fd: 

# pylint: disable=unused-variable 

for i in range(7): 

metadata_part += [fd.readline()] 

52 ↛ 153line 52 didn't jump to line 153, because the condition on line 52 was never false if len(metadata_part) == 7: 

nlhead_ffi = metadata_part[0].strip().split() 

54 ↛ 59line 54 didn't jump to line 59, because the condition on line 54 was never false if isinstance(nlhead_ffi, list) and len(nlhead_ffi) == 2: 

# NLHEAD: Number of lines in file header 

# FFI: File format index 

addresult['NLHEAD'], addresult['FFI'] = map(int, nlhead_ffi) 

else: 

result[checker_name]['log'] += [ 

'error: ' 

'no nasa ames format detected (cannot analyse first line)'] 

result[checker_name]['error'] += 1 

63 ↛ 87line 63 didn't jump to line 87, because the condition on line 63 was never false if result[checker_name]['error'] == 0: 

64 ↛ 84line 64 didn't jump to line 84, because the condition on line 64 was never false if bool(metadata_part[5]): # len(metadata_part[5]) > 0 

ivol_nvol = metadata_part[5].strip().split() 

66 ↛ 80line 66 didn't jump to line 80, because the condition on line 66 was never false if isinstance(ivol_nvol, list) and len(ivol_nvol) == 2: 

# IVOL: Number of the file in the above dataset 

# (between 1 and NVOL). 

# NVOL: Total number of files belonging to the considered 

# dataset (i.e. with same ONAME, ORG, SNAME, MNAME). 

ivol, nvol = map(int, ivol_nvol) 

72 ↛ 76line 72 didn't jump to line 76, because the condition on line 72 was never false if 1 <= ivol <= nvol: 

addresult['IVOL'] = ivol 

addresult['NVOL'] = nvol 

else: 

result[checker_name]['log'] += [ 

'error: do not understand IVOL and NVOL'] 

result[checker_name]['error'] += 1 

else: 

result[checker_name]['log'] += [ 

'error: cannot extract IVOL and NVOL'] 

result[checker_name]['error'] += 1 

else: 

result[checker_name]['log'] += [ 

'error: IVOL and NVOL not found'] 

result[checker_name]['error'] += 1 

87 ↛ 153line 87 didn't jump to line 153, because the condition on line 87 was never false if result[checker_name]['error'] == 0: 

88 ↛ 106line 88 didn't jump to line 106, because the condition on line 88 was never false if bool(metadata_part[1]): # len(metadata_part[1]) > 0 

89 ↛ 102line 89 didn't jump to line 102, because the condition on line 89 was never false if len(metadata_part[1]) < 132 + 1: 

# ONAME: List of author(s) in the format Lastname, 

# Firstname; separated by an arbitrary character 

# (for example, a hyphen or a semi-colon). 

# since it is hard to automatic split at an arbitrary 

# character, we only check for a comma 

95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never false if ',' in metadata_part[1]: 

addresult['ONAME'] = metadata_part[1].strip() 

else: 

result[checker_name]['log'] += [ 

'warning: do not understand ONAME format'] 

result[checker_name]['warning'] += 1 

else: 

result[checker_name]['log'] += [ 

'warning: ONAME too long'] 

result[checker_name]['warning'] += 1 

else: 

result[checker_name]['log'] += [ 

'warning: ONAME is empty'] 

result[checker_name]['warning'] += 1 

for (pos, tag) in [(2, 'ORG'), (3, 'SNAME'), (4, 'MNAME')]: 

# ORG: Organisation name (university, institute, etc). 

# May include address and phone numbers. 

# SNAME: Source of data, i.e. instrument, platform, model name, 

# etc. 

# MNAME: Name of mission, campaign, programme and/or project. 

# NVOL: Total number of files belonging to the considered 

# dataset (i.e. with same ONAME, ORG, SNAME, MNAME). 

117 ↛ 109line 117 didn't jump to line 109, because the condition on line 117 was never false if bool(metadata_part[pos]): # len(metadata_part[pos]) > 0 

118 ↛ 121line 118 didn't jump to line 121, because the condition on line 118 was never false if len(metadata_part[pos]) < 132 + 1: 

addresult[tag] = metadata_part[pos].strip() 

else: 

result[checker_name]['log'] += [ 

'warning: ' + tag + ' too long'] 

result[checker_name]['warning'] += 1 

124 ↛ 153line 124 didn't jump to line 153, because the condition on line 124 was never false if bool(metadata_part[6]): # len(metadata_part[6]) > 0 

date_rdate = re.findall( 

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})', 

metadata_part[6].strip()) 

128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true if len(date_rdate) > 2: 

result[checker_name]['log'] += [ 

'warning: too many "dates" in DATE RDATE'] 

result[checker_name]['warning'] += 1 

132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true elif len(date_rdate) == 1: 

addresult['DATE'] = date_rdate2isoformat(date_rdate[0]) 

134 ↛ 150line 134 didn't jump to line 150, because the condition on line 134 was never false elif len(date_rdate) == 2: 

date_rdate = re.findall( 

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})' 

r'\s*' 

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})', 

metadata_part[6].strip()) 

140 ↛ 146line 140 didn't jump to line 146, because the condition on line 140 was never false if date_rdate: 

addresult['DATE'] = date_rdate2isoformat( 

date_rdate[0][0]) 

addresult['RDATE'] = date_rdate2isoformat( 

date_rdate[0][1]) 

else: 

result[checker_name]['log'] += [ 

'warning: do not understand DATE RDATE'] 

result[checker_name]['warning'] += 1 

else: 

result[checker_name]['log'] += [ 

'warning: do not understand DATE RDATE'] 

result[checker_name]['warning'] += 1 

result[checker_name]['created'] = time.time() 

154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false if output_format != 'human_readable': 

for key in addresult: 

result[checker_name][key] = addresult[key] 

return result