1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

""" 

:Author: Daniel Mohr 

:Email: daniel.mohr@dlr.de 

:Date: 2021-03-19 (last change). 

:License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007. 

""" 

 

import json 

 

from dabu.compare_json_schemas import compare_json_schemas 

 

from .get_graph_item import get_graph_item 

 

 

def _are_types_equal(type1, type2): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-19 

""" 

iseq = True 

try: 

compare_json_schemas(type1, type2) 

except AssertionError: 

iseq = False 

return iseq 

 

 

def create_properties_schema2json( 

properties, schema2json, prop_name, word, item_type): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-19 

""" 

# pylint: disable=too-many-branches 

value = "https://schema.org/" + word 

if prop_name not in properties: 

properties[prop_name] = dict() 

store_prop = properties[prop_name] 

else: 

40 ↛ 62line 40 didn't jump to line 62, because the condition on line 40 was never false if isinstance(properties[prop_name], dict): 

if "oneOf" not in properties[prop_name]: 

if (("@id" in properties[prop_name]) and 

(properties[prop_name]["@id"] == value)): 

return 

properties[prop_name] = {"oneOf": [properties[prop_name]]} 

properties[prop_name]["oneOf"].append(dict()) 

store_prop = properties[prop_name]["oneOf"][-1] 

else: # properties[prop_name]["oneOf"] is list 

newtype = dict() 

newtype["@id"] = value 

if isinstance(schema2json[item_type], str): 

newtype["type"] = schema2json[item_type] 

else: # dict 

for key in schema2json[item_type]: 

newtype[key] = schema2json[item_type][key] 

for item in properties[prop_name]["oneOf"]: 

if _are_types_equal(newtype, item): 

return 

properties[prop_name]["oneOf"].append(dict()) 

store_prop = properties[prop_name]["oneOf"][-1] 

else: 

raise NotImplementedError(json.dumps(properties, indent=2)) 

store_prop["@id"] = value 

if isinstance(schema2json[item_type], str): 

store_prop["type"] = schema2json[item_type] 

else: # dict 

for key in schema2json[item_type]: 

store_prop[key] = schema2json[item_type][key] 

 

 

def create_properties_handle( 

properties, prop_name, missing_words, item_type_ref): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-17 

""" 

value = "#/definitions/" + item_type_ref 

if prop_name not in properties: 

properties[prop_name] = dict() 

store_prop = properties[prop_name] 

else: 

82 ↛ 96line 82 didn't jump to line 96, because the condition on line 82 was never false if isinstance(properties[prop_name], dict): 

if "oneOf" not in properties[prop_name]: 

if (("$ref" in properties[prop_name]) and 

(properties[prop_name]["$ref"] == value)): 

return 

properties[prop_name] = {"oneOf": [properties[prop_name]]} 

else: # properties[prop_name]["oneOf"] is list 

for item in properties[prop_name]["oneOf"]: 

if (("$ref" in item) and 

(item["$ref"] == value)): 

return 

properties[prop_name]["oneOf"].append(dict()) 

store_prop = properties[prop_name]["oneOf"][-1] 

else: 

raise NotImplementedError(json.dumps(properties, indent=2)) 

store_prop["$ref"] = value 

missing_words.append(item_type_ref) 

 

 

def _rangeincludes_list(data, schema2json, handle): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-17 

""" 

accept_list = [] 

107 ↛ 108line 107 didn't jump to line 108, because the condition on line 107 was never true if "schema:rangeIncludes" not in data: 

return None 

if not isinstance(data["schema:rangeIncludes"], list): 

return None 

for item in data["schema:rangeIncludes"]: 

112 ↛ 111line 112 didn't jump to line 111, because the condition on line 112 was never false if "@id" in item: 

item_type = item["@id"].split('schema:')[1] 

if ((item_type in schema2json) or (item_type in handle)): 

accept_list.append(item_type) 

return accept_list 

 

 

def _get_property(item, data, properties, prop_name, 

missing_words, draft='draft-04'): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-19 

""" 

# pylint: disable=too-many-arguments,too-many-branches 

# schema.org datatypes to handle ("schema:DataType"): 

schema2json = {"Text": "string", 

"Boolean": "boolean", 

"Integer": "integer", 

"Number": "number", 

"Float": "number", 

"URL": {"type": "string", "format": "uri"}, 

"DateTime": {"type": "string", "format": "date-time"}, 

"Date": {"type": "string", "format": "date-time"}, 

"Time": "string", 

"email": {"oneOf": [{"type": "string"}, 

{"type": "string", "format": "email"}]}} 

138 ↛ 139line 138 didn't jump to line 139 if draft in ['draft-06']: 

schema2json["URL"] = { 

"oneOf": [{"type": "string", "format": "uri"}, 

{"type": "string", "format": "uri-reference"}]} 

142 ↛ 143line 142 didn't jump to line 143, because the condition on line 142 was never true elif draft in ['draft-07', '2019-09']: 

schema2json["Date"] = {"type": "string", "format": "date"} 

schema2json["Time"] = {"type": "string", "format": "datetime"} 

schema2json["email"] = { 

"oneOf": [{"type": "string"}, 

{"type": "string", "format": "email"}, 

{"type": "string", "format": "idn-email"}]} 

schema2json["URL"] = { 

"oneOf": [{"type": "string", "format": "uri"}, 

{"type": "string", "format": "uri-reference"}, 

{"type": "string", "format": "iri"}, 

{"type": "string", "format": "iri-reference"}]} 

handle = ["ImageObject", "MediaObject", 

"Distance", "CreativeWork", 

"DefinedTerm", 

"Thing", "Person", 

"Place", "Comment", 

"DataCatalog", "Dataset", "DataDownload", 

"Organization", "AdministrativeArea", "VirtualLocation", 

"EducationalOrganization", "Photograph", "CorrectionComment", 

"AudioObject", "Brand", "ContactPoint", "Language", "Occupation", 

"hasOfferCatalog", "OfferCatalog", "InteractionCounter", "Offer", 

"ProgramMembership", "Country", "MonetaryAmount", 

"PriceSpecification", "OwnershipInfo", "Product", "Event", 

"Demand", "QuantitativeValue", "PropertyValue"] 

word = data["@id"].split('schema:')[1] 

if word in schema2json: 

create_properties_schema2json( 

properties, schema2json, prop_name, word, word) 

elif (("schema:rangeIncludes" in data) and 

("@id" in data["schema:rangeIncludes"]) and 

(data["schema:rangeIncludes"]["@id"].split('schema:')[1] in 

schema2json)): 

# e. g.: faxNumber 

create_properties_schema2json( 

properties, schema2json, word, prop_name, 

data["schema:rangeIncludes"]["@id"].split('schema:')[1]) 

elif item["@id"].split('schema:')[1] in handle: 

create_properties_handle( 

properties, prop_name, missing_words, 

item["@id"].split('schema:')[1]) 

183 ↛ 188line 183 didn't jump to line 188, because the condition on line 183 was never true elif (("schema:rangeIncludes" in data) and 

("@id" in data["schema:rangeIncludes"]) and 

(data["schema:rangeIncludes"]["@id"].split('schema:')[1] in 

handle)): 

# e. g.: follows 

create_properties_handle( 

properties, prop_name, missing_words, 

data["schema:rangeIncludes"]["@id"].split('schema:')[1]) 

else: 

accept_list = _rangeincludes_list(data, schema2json, handle) 

if accept_list is not None: 

if len(accept_list) > 1: 

195 ↛ exitline 195 didn't return from function '_get_property', because the loop on line 195 didn't complete for item_type in accept_list: 

if item_type in schema2json: 

create_properties_schema2json( 

properties, schema2json, prop_name, word, 

item_type) 

200 ↛ 195line 200 didn't jump to line 195, because the condition on line 200 was never false elif item_type in handle: 

create_properties_handle( 

properties, prop_name, missing_words, item_type) 

203 ↛ exitline 203 didn't return from function '_get_property', because the condition on line 203 was never false elif len(accept_list) == 1: 

item_type = accept_list[0] 

if item_type in schema2json: 

create_properties_schema2json( 

properties, schema2json, prop_name, word, item_type) 

208 ↛ exitline 208 didn't return from function '_get_property', because the condition on line 208 was never false elif item_type in handle: 

create_properties_handle( 

properties, prop_name, missing_words, item_type) 

else: 

pass # not implemented now 

 

 

def get_property(schemaorg_data, properties, prop_name, missing_words, 

draft='draft-04'): 

""" 

:Author: Daniel Mohr 

:Date: 2021-03-19 

""" 

data = get_graph_item(schemaorg_data, prop_name) 

222 ↛ 223line 222 didn't jump to line 223, because the condition on line 222 was never true if ("@type" in data) and (data["@type"] == "rdfs:Class"): 

new_missing_word = data["@id"].split('schema:')[1] 

properties[prop_name] = dict() 

properties[prop_name]["$ref"] = "#/definitions/" + new_missing_word 

missing_words.append(new_missing_word) 

return None 

228 ↛ 246line 228 didn't jump to line 246, because the condition on line 228 was never false if ("@type" in data) and (data["@type"] == "rdf:Property"): 

229 ↛ 244line 229 didn't jump to line 244, because the condition on line 229 was never false if "schema:rangeIncludes" in data: 

if isinstance(data["schema:rangeIncludes"], dict): 

231 ↛ 232line 231 didn't jump to line 232, because the condition on line 231 was never true if "@id" not in data["schema:rangeIncludes"]: 

raise NotImplementedError(json.dumps(data, indent=2)) 

_get_property( 

data["schema:rangeIncludes"], data, 

properties, prop_name, missing_words, draft) 

236 ↛ 242line 236 didn't jump to line 242, because the condition on line 236 was never false elif isinstance(data["schema:rangeIncludes"], list): 

for item in data["schema:rangeIncludes"]: 

_get_property( 

item, data, properties, prop_name, 

missing_words, draft) 

else: 

raise NotImplementedError(json.dumps(data, indent=2)) 

else: 

raise NotImplementedError(json.dumps(data, indent=2)) 

else: 

raise NotImplementedError(json.dumps(data, indent=2)) 

return None