如何使用mapreduce解析json文件?

rekjcdws  于 2021-06-02  发布在  Hadoop
关注(0)|答案(0)|浏览(481)

我是json格式的新手。我正在尝试学习如何解析json文件并使用mapreduce编程模型提取其中的数据。有没有json解析器可以读取记录中的多行。以下是json文件中可能存在的最大元素数(&M):

{ 
    "type": "",
    "format": "",
    "version": "",
    "id": "",
    "start": "",
    "cp": "",
message:{ "proto": "","protoVer": "","cliIP": "","reqPort": "","reqHost": "","reqMethod": "","reqPath": "","reqQuery": "","reqCT": "","reqLen": "","sslVer": "","status": "","redirURL":  "","respCT": "","respLen": "","bytes": "","UA": "","fwdHost":},

reqHdr:{"accEnc": "","accLang": "","auth": "","cacheCtl": "","conn": "","contMD5": "","cookie": "","DNT": "","expect": "","ifMatch": "","ifMod": "","ifNone": "","ifRange": "","ifUnmod": "","range": "","referer": "","te": "","upgrade": "","via": "","xFrwdFor": "","xReqWith": ""},

"respHdr": {"accRange": "","allowOrigin": "","age": "","allow": "","cacheCtl": "","conn": "","contEnc": "","contLang": "","contMD5": "","contDisp": "","contRange": "","date": "","eTag": "","expires": "","lastMod": "","link": "","p3p": "","retry": "","server": "","trailer": "","transEnc": "","vary": "","via": "","warning": "","wwwAuth": "","xPwrdBy": "","setCookie": ""},

"netPerf": {"downloadTime": "","originName": "","originIP": "","originInitIP": "","originRetry": "","lastMileRTT": "","midMileLatency": "","netOriginLatency": "","lastMileBW": "","cacheStatus": "","firstByte": "","lastByte": "","asnum": "","network": "","netType": "","edgeIP": ""},

"geo": {"country": "","region": "","city": ""},

"waf" : {"logVer" : "1.0","ipRules" : "","appRules" : "","warn" : "","deny" : ""},

"content": {"custom_name": "custom_value"},

}

这些是json文件中的示例值。

`{"type":"cloud_monitor","format":"default","version":"1.0","id":"71101cb85441995d11a43bb","start":"1413585245.921","cp":"254623","message":{"proto":"http","protoVer":"1.1","status":"403","cliIP":"23.79.231.14","reqPort":"80","reqHost":"ksd.metareactor.com","reqMethod":"GET","reqPath":"%2findex.php","reqQuery":"path%3d57%26product_id%3d49%26route%3d%255Cwinnt%255Cwin.ini%2500.","respCT":"text/html","respLen":"286","bytes":"286","UA":"mozilla-saturn","fwdHost":"origin-demo2-akamaized.scoe-sil.net"}`,
"reqHdr":{"accEnc":"gzip,%20deflate","cookie":"PHPSESSID%3dkkqoodvfe0rt9l7lbvqghk6e15%3bcurrency%3dUSD%3blanguage%3den"},"netPerf":{"downloadTime":"8","lastMileRTT":"20","cacheStatus":"0","firstByte":"1","lastByte":"1","asnum":"12222","edgeIP":"184.28.16.109"},"geo":{"country":"US","region":"CA","city":"SANFRANCISCO","lat":"37.7795","long":"-122.4195"},"network":{"edgeIP":"184.28.16.109","asnum":"12222","network":"","networkType":""},"waf":{"ver":"2.0","policy":"qik1_12418","ruleSet":"KRS%201.0","mode":"scr","rsr":"1","dor":"0","oft":"0","riskGroups":":INBOUND-ANOMALY","riskTuples":":-3000002","riskScores":":-1000","pAction":"","pRate":"","warnRules":"3000002","warnSlrs":"ARGS%3aroute","warnData":"d2lubnQvd2luLmluaQ%3d%3d","warnTags":"AKAMAI%2fWEB_ATTACK%2fFILE_INJECTION","denyRules":"INBOUND-ANOMALY","denyData":"U2NvcmU6IDEwMDAsIERFTlkgdGhyZXNob2xkOiAyNSwgQWxlcnQgUnVsZXM6IDMwMDAwMDIsIERlbnkgUnVsZTogLCBMYXN0IE1hdGNoZWQgTWVzc2FnZTogTG9jYWwgU3lzdGVtIEZpbGUgQWNjZXNzIEF0dGVtcHQ%3d"}}

我有一个javajson解析器,但我可以用它来读取一行。如何在json文件中标识多行记录并在mapreduce代码中使用它来提取数据。
我的json解析器类:

String[] tuple = value.toString().split("\n");
try {
    for(int i=0; i<tuple.length; i++) {
        JSONObject jsonobj = new JSONObject(tuple[i]);
        type    = (String) jsonobj.get("type");
        format  = (String) jsonobj.get("format");
        version = (String) jsonobj.get("version");
        id      = (String) jsonobj.get("id");
        start   = (String) jsonobj.get("start");
        cp      = (String) jsonobj.get("cp");
        message = (String) jsonobj.get("message");
    }
} catch (JSONException e) {
    e.printStackTrace();
}

有人能帮我写代码来读取mapreduce中多行的json记录吗?

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题