Coverage for /home/runner/.local/lib/python3.8/site-packages/dabu/check_nasa_ames_format/check_nasa_ames_format.py: 65%

"""

:Author: Daniel Mohr

:Email: daniel.mohr@dlr.de

:Date: 2021-02-08 (last change).

:License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.

"""

import datetime

import re

import time

def date_rdate2isoformat(data):

"""

:Author: Daniel Mohr

:Email: daniel.mohr@dlr.de

:Date: 2021-02-08 (last change).

"""

splited = data.split()

return datetime.date(int(splited[0]),

int(splited[1]),

int(splited[2])).isoformat()

def check_nasa_ames_format(filename, output_format='human_readable'):

"""

:Author: Daniel Mohr

:Email: daniel.mohr@dlr.de

:Date: 2021-02-08 (last change).

Checks the given file for the nasa ames format, see:

* http://cedadocs.ceda.ac.uk/73/

* http://cedadocs.ceda.ac.uk/73/4/index.html

* http://cedadocs.ceda.ac.uk/73/4/FFI-summary.html

:param filename: file to analyse

"""

# pylint: disable=too-many-locals,too-many-branches,too-many-statements

result = dict()

checker_name = 'pydabu (nasa ames format check)'

addresult = dict()

result[checker_name] = dict()

result[checker_name]['error'] = 0

result[checker_name]['warning'] = 0

result[checker_name]['log'] = []

metadata_part = []

with open(filename, mode='r') as fd:

# pylint: disable=unused-variable

for i in range(7):

metadata_part += [fd.readline()]

52 ↛ 153line 52 didn't jump to line 153, because the condition on line 52 was never false if len(metadata_part) == 7:

nlhead_ffi = metadata_part[0].strip().split()

54 ↛ 59line 54 didn't jump to line 59, because the condition on line 54 was never false if isinstance(nlhead_ffi, list) and len(nlhead_ffi) == 2:

# NLHEAD: Number of lines in file header

# FFI: File format index

addresult['NLHEAD'], addresult['FFI'] = map(int, nlhead_ffi)

else:

result[checker_name]['log'] += [

'error: '

'no nasa ames format detected (cannot analyse first line)']

result[checker_name]['error'] += 1

63 ↛ 87line 63 didn't jump to line 87, because the condition on line 63 was never false if result[checker_name]['error'] == 0:

64 ↛ 84line 64 didn't jump to line 84, because the condition on line 64 was never false if bool(metadata_part[5]): # len(metadata_part[5]) > 0

ivol_nvol = metadata_part[5].strip().split()

66 ↛ 80line 66 didn't jump to line 80, because the condition on line 66 was never false if isinstance(ivol_nvol, list) and len(ivol_nvol) == 2:

# IVOL: Number of the file in the above dataset

# (between 1 and NVOL).

# NVOL: Total number of files belonging to the considered

# dataset (i.e. with same ONAME, ORG, SNAME, MNAME).

ivol, nvol = map(int, ivol_nvol)

72 ↛ 76line 72 didn't jump to line 76, because the condition on line 72 was never false if 1 <= ivol <= nvol:

addresult['IVOL'] = ivol

addresult['NVOL'] = nvol

else:

result[checker_name]['log'] += [

'error: do not understand IVOL and NVOL']

result[checker_name]['error'] += 1

else:

result[checker_name]['log'] += [

'error: cannot extract IVOL and NVOL']

result[checker_name]['error'] += 1

else:

result[checker_name]['log'] += [

'error: IVOL and NVOL not found']

result[checker_name]['error'] += 1

87 ↛ 153line 87 didn't jump to line 153, because the condition on line 87 was never false if result[checker_name]['error'] == 0:

88 ↛ 106line 88 didn't jump to line 106, because the condition on line 88 was never false if bool(metadata_part[1]): # len(metadata_part[1]) > 0

89 ↛ 102line 89 didn't jump to line 102, because the condition on line 89 was never false if len(metadata_part[1]) < 132 + 1:

# ONAME: List of author(s) in the format Lastname,

# Firstname; separated by an arbitrary character

# (for example, a hyphen or a semi-colon).

# since it is hard to automatic split at an arbitrary

# character, we only check for a comma

95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never false if ',' in metadata_part[1]:

addresult['ONAME'] = metadata_part[1].strip()

else:

result[checker_name]['log'] += [

'warning: do not understand ONAME format']

result[checker_name]['warning'] += 1

else:

result[checker_name]['log'] += [

'warning: ONAME too long']

result[checker_name]['warning'] += 1

else:

result[checker_name]['log'] += [

'warning: ONAME is empty']

result[checker_name]['warning'] += 1

for (pos, tag) in [(2, 'ORG'), (3, 'SNAME'), (4, 'MNAME')]:

# ORG: Organisation name (university, institute, etc).

# May include address and phone numbers.

# SNAME: Source of data, i.e. instrument, platform, model name,

# etc.

# MNAME: Name of mission, campaign, programme and/or project.

# NVOL: Total number of files belonging to the considered

# dataset (i.e. with same ONAME, ORG, SNAME, MNAME).

117 ↛ 109line 117 didn't jump to line 109, because the condition on line 117 was never false if bool(metadata_part[pos]): # len(metadata_part[pos]) > 0

118 ↛ 121line 118 didn't jump to line 121, because the condition on line 118 was never false if len(metadata_part[pos]) < 132 + 1:

addresult[tag] = metadata_part[pos].strip()

else:

result[checker_name]['log'] += [

'warning: ' + tag + ' too long']

result[checker_name]['warning'] += 1

124 ↛ 153line 124 didn't jump to line 153, because the condition on line 124 was never false if bool(metadata_part[6]): # len(metadata_part[6]) > 0

date_rdate = re.findall(

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})',

metadata_part[6].strip())

128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true if len(date_rdate) > 2:

result[checker_name]['log'] += [

'warning: too many "dates" in DATE RDATE']

result[checker_name]['warning'] += 1

132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true elif len(date_rdate) == 1:

addresult['DATE'] = date_rdate2isoformat(date_rdate[0])

134 ↛ 150line 134 didn't jump to line 150, because the condition on line 134 was never false elif len(date_rdate) == 2:

date_rdate = re.findall(

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})'

r'\s*'

r'([0-9]{4}[ ]{1,2}[0-9]{1,2}[ ]{1,2}[0-9]{1,2})',

metadata_part[6].strip())

140 ↛ 146line 140 didn't jump to line 146, because the condition on line 140 was never false if date_rdate:

addresult['DATE'] = date_rdate2isoformat(

date_rdate[0][0])

addresult['RDATE'] = date_rdate2isoformat(

date_rdate[0][1])

else:

result[checker_name]['log'] += [

'warning: do not understand DATE RDATE']

result[checker_name]['warning'] += 1

else:

result[checker_name]['log'] += [

'warning: do not understand DATE RDATE']

result[checker_name]['warning'] += 1

result[checker_name]['created'] = time.time()

154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false if output_format != 'human_readable':

for key in addresult:

result[checker_name][key] = addresult[key]

return result