Python加载json数据

时间:2022-09-28 cool whidpers 人气:0

前言

最近在python里面用json读取json文件，可是老是不成功，特此记录一下。

预备知识：

def load(fp, cls=None, object_hook=None, parse_float=None,
        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    a JSON document) to a Python object."""


def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
        parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    """Deserialize ``s`` (a ``str`` instance containing a JSON
    document) to a Python object."""

其实我刚刚看json.load()和json.loads()代码定义的时候，也不知道什么是文件类型。什么是字符串类型，用python的type函数看一下就好了

例如：

with open("文件名") as f:
     print(type(f))  # <class '_io.TextIOWrapper'>  也就是文本IO类型
     result=json.load(f)

with open("文件名") as f:
    line=f.readline():  
    print(type(line))  # <class 'str'>
    result=json.loads(line)

使用方法

从以上可以看出json.load()是用来读取文件的，即，将文件打开然后就可以直接读取。示例如下：

with open("文件名") as f:
     result=json.load(f)

json.loads()是用来读取字符串的，即，可以把文件打开，用readline()读取一行，然后json.loads()一行。示例如下：

#json文件为：
{"outputs": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"]}

读取代码如下：

with open("文件名") as f:
    line=f.readline():
    result=json.loads(line)

当json文件如下时，读取内容是错误的：

{
"dataset":{
    "train": {"type": "mnist", "data_set": "train", "layout_x": "tensor"},
    "test": {"type": "mnist", "data_set": "test", "layout_x": "tensor"}
},
"train":{
    "keep_model_in_mem":0,
    "random_state":0,
    "data_cache":{
        "cache_in_disk":{
            "default":1
        },
        "keep_in_mem":{
            "default":0
        },
        "cache_dir":"/mnt/raid/fengji/gcforest/mnist/fg-tree500-depth100-3folds/datas"
    }
},
"net":{
"outputs": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"],
"layers":[
    {
        "type":"FGWinLayer",
        "name":"win1/7x7",
        "bottoms": ["X","y"],
        "tops":["win1/7x7/ets", "win1/7x7/rf"],
        "n_classes": 124,
        "estimators": [
            {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10},
            {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}
        ],
        "stride_x": 2,
        "stride_y": 2,
        "win_x":7,
        "win_y":7
    },
    {
        "type":"FGWinLayer",
        "name":"win1/10x10",
        "bottoms": ["X","y"],
        "tops":["win1/10x10/ets", "win1/10x10/rf"],
        "n_classes": 10,
        "estimators": [
            {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10},
            {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}
        ],
        "stride_x": 2,
        "stride_y": 2,
        "win_x":10,
        "win_y":10
    },
    {
        "type":"FGWinLayer",
        "name":"win1/13x13",
        "bottoms": ["X","y"],
        "tops":["win1/13x13/ets", "win1/13x13/rf"],
        "n_classes": 10,
        "estimators": [
            {"n_folds":3,"type":"ExtraTreesClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10},
            {"n_folds":3,"type":"RandomForestClassifier","n_estimators":500,"max_depth":100,"n_jobs":-1,"min_samples_leaf":10}
        ],
        "stride_x": 2,
        "stride_y": 2,
        "win_x":13,
        "win_y":13
    },
    {
        "type":"FGPoolLayer",
        "name":"pool1",
        "bottoms": ["win1/7x7/ets", "win1/7x7/rf", "win1/10x10/ets", "win1/10x10/rf", "win1/13x13/ets", "win1/13x13/rf"],
        "tops": ["pool1/7x7/ets", "pool1/7x7/rf", "pool1/10x10/ets", "pool1/10x10/rf", "pool1/13x13/ets", "pool1/13x13/rf"],
        "pool_method": "avg",
        "win_x":2,
        "win_y":2
    }
]

}
}

因为在代码中，json.loads()并没有读取完整的json文件，只是读取了行，所以这时json.loads(line)读取的是不合符json语法的字符串，会报错：

with open("文件名") as f:
    line=f.readline():   # 这里line只是读取了json文件的一行，并没有全部读取，所以line里面所存的字符串是不符合json语法的，所以读取出错。
    result=json.loads(line)

 Traceback (most recent call last):
  File "D:/PycharmProjects/mnistCheck/test.py", line 12, in <module>
    result = json.loads(row)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\json\__init__.py", line 319, in loads
    return _default_decoder.decode(s)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\json\decoder.py", line 339, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\json\decoder.py", line 355, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 2 column 1 (char 2)

那么问题来了。。。。在实际应用中，我们会在json文件中做注释，比如以“//”开头的注释，除了注释部分外，其他内容都是符合json语法的，那么我们要怎么处理呢？

    def load_json(path):   
    import json
    lines = []     #  第一步：定义一个列表， 打开文件
    with open(path) as f:  
        for row in f.readlines(): # 第二步：读取文件内容 
            if row.strip().startswith("//"):   # 第三步：对每一行进行过滤 
                continue
            lines.append(row)                   # 第四步：将过滤后的行添加到列表中.
    return json.loads("\n".join(lines))       #将列表中的每个字符串用某一个符号拼接为一整个字符串，用json.loads()函数加载，这样就大功告成啦！！

总结

加载全部内容