Skip to content

Commit 1d1600a

Browse files
authored
Add JSON loader (#296)
1 parent 455d725 commit 1d1600a

File tree

7 files changed

+108
-0
lines changed

7 files changed

+108
-0
lines changed

pardata/_high_level.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def load_dataset_from_location(url_or_path: Union[str, typing_.PathLike], *,
251251
RegexFormatPair(regex=r'.*\.(txt|log)', format='text/plain'),
252252
RegexFormatPair(regex=r'.*\.(jpg|jpeg)', format='image/jpeg'),
253253
RegexFormatPair(regex=r'.*\.png', format='image/png'),
254+
RegexFormatPair(regex=r'.*\.json', format='json'),
254255
)
255256

256257
for regex_format_pair in regex_format_pairs:

pardata/loaders/_format_loader_map.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from .image import PillowLoader
3030
from .text import PlainTextLoader
3131
from .table import CSVPandasLoader
32+
from .json import JSONLoader
3233

3334

3435
class FormatLoaderMap:
@@ -79,6 +80,7 @@ def __contains__(self, fmt: str) -> bool:
7980
'image/jpeg': PillowLoader(),
8081
'image/png': PillowLoader(),
8182
'audio/wav': WaveLoader(),
83+
'json': JSONLoader(),
8284
})
8385

8486

pardata/loaders/_json.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#
2+
# Copyright 2021 IBM Corp. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
"JSON file loaders."
18+
19+
20+
from typing import cast, Dict, Union, Any
21+
import json
22+
23+
from .. import typing as typing_
24+
from ..schema import SchemaDict
25+
from ._base import Loader
26+
27+
28+
class JSONLoader(Loader):
29+
"""Loads a JSON file to an object representing the data."""
30+
31+
def load(self, path: Union[typing_.PathLike, Dict[str, str]], options: SchemaDict) -> Any:
32+
"""
33+
:param path: The path to the JSON file.
34+
:param options: None for JSON loader.
35+
:raises TypeError: ``path`` is not a path-like object.
36+
:return: An object representing loaded data. See :meth:`json.load` for details.
37+
"""
38+
39+
super().load(path, options)
40+
41+
# We can remove usage of cast once Dict[str, str] handling is added
42+
path = cast(typing_.PathLike, path)
43+
44+
with open(path) as json_file:
45+
return json.load(json_file)

pardata/loaders/json.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#
2+
# Copyright 2021 IBM Corp. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
"JSON loaders."
18+
19+
20+
from ._json import JSONLoader
21+
22+
__all__ = ('JSONLoader',)

tests/assets/people.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[
2+
{
3+
"name": "Alice",
4+
"age": 25,
5+
"state": "California"
6+
},
7+
{
8+
"name": "Bob",
9+
"age": 21,
10+
"state": "Florida"
11+
},
12+
{
13+
"name": "Carol",
14+
"age": 28,
15+
"state": "Texas"
16+
}
17+
]

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,3 +409,10 @@ def bell_sound(asset_dir) -> Path:
409409
"Path to the service-bell.wav."
410410

411411
return asset_dir / 'service-bell.wav'
412+
413+
414+
@pytest.fixture
415+
def people_json(asset_dir) -> Path:
416+
"Path to people.json"
417+
418+
return asset_dir / 'people.json'

tests/test_loaders.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pandas.api.types import is_datetime64_any_dtype, is_float_dtype, is_integer_dtype, is_string_dtype
2323
from PIL import Image, ImageChops
2424
import wave
25+
import json
2526

2627
from pardata.dataset import Dataset
2728
from pardata.loaders import Loader
@@ -31,6 +32,7 @@
3132
from pardata.loaders.image import PillowLoader
3233
from pardata.loaders.text import PlainTextLoader
3334
from pardata.loaders.table import CSVPandasLoader
35+
from pardata.loaders.json import JSONLoader
3436

3537

3638
class TestBaseLoader:
@@ -339,3 +341,15 @@ def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema):
339341

340342
del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format']['options']['no_header']
341343
self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)
344+
345+
346+
class TestJSONLoaders:
347+
def test_json_loader(self, people_json):
348+
"Test the normal functionality of JSONLoader"
349+
350+
with open(people_json) as local:
351+
local_content = json.load(local)
352+
353+
loaded_content = JSONLoader().load(people_json, {})
354+
355+
assert local_content == loaded_content

0 commit comments

Comments
 (0)