我试过运行hive query of table creation命令。
代码内容如下:
CREATE EXTERNAL TABLE avro_test_1
ROW FORMAT
SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
WITH SERDEPROPERTIES ('avro.schema.literal'='
{
"type" : "record",
"name" : "pixel_record",
"namespace" : "com.mediamath.log_streaming",
"doc" : "represents a record published into kafka for each pixel event",
"fields" : [ {
"name" : "event_id",
"type" : "string",
"doc" : "unique ID for each pixel event",
"default" : ""
}, {
"name" : "system_timestamps",
"type" : [ "null", {
"type" : "array",
"items" : {
"type" : "record",
"name" : "timestamps_record",
"doc" : "Read and publish timestamp of a system that processed this event",
"fields" : [ {
"name" : "publisher",
"type" : "string",
"doc" : "name of the system publishing this event",
"default" : ""
}, {
"name" : "source_time",
"type" : "long",
"doc" : "unix time in nanos denoting moment when the message was received by the system",
"default" : 0
}, {
"name" : "publish_time",
"type" : "long",
"doc" : "unix time in nanos denoting the moment when the message was published by the system",
"default" : 0
} ]
}
} ],
"doc" : "array of 3-tuples containing message receive time, system name, and system publish time",
"default" : null
}, {
"name" : "errors",
"type" : [ "null", {
"type" : "array",
"items" : {
"type" : "record",
"name" : "Error",
"doc" : "Used to indicate if an error occurred while processing this event. Provides more context about the nature of error",
"fields" : [ {
"name" : "publisher",
"type" : "string",
"doc" : "name of the system that encountered an error while processing this event",
"default" : ""
}, {
"name" : "error_message",
"type" : "string",
"doc" : "brief error message",
"default" : ""
}, {
"name" : "error_code",
"type" : "int",
"doc" : "pre-defined error codes for specific error types",
"default" : 0
} ]
}
} ],
"doc" : "array of errors that occured while processing this event",
"default" : null
}, {
"name" : "event_data",
"type" : {
"type" : "record",
"name" : "pixel_event_data",
"doc" : "represents a pixel event that was processed by MathTag. Refer to https://wiki.mediamath.com/wiki/pages/viewpage.action?pageId=79659035",
"fields" : [ {
"name" : "mt_timestamp",
"type" : "string",
"doc" : "timestamp when this pixel event occurred",
"default" : ""
}, {
"name" : "ip_address",
"type" : "string",
"doc" : "IP address of browser",
"default" : ""
}, {
"name" : "domain_name",
"type" : "string",
"doc" : "MM domain name like pixel.mathtag.com",
"default" : ""
}, {
"name" : "mm_uuid",
"type" : "string",
"doc" : "MediaMath UUID",
"default" : ""
}, {
"name" : "path_query",
"type" : "string",
"default" : ""
}, {
"name" : "user_agent",
"type" : "string",
"default" : ""
}, {
"name" : "referrer",
"type" : "string",
"default" : ""
}, {
"name" : "record_length",
"type" : "int",
"doc" : "string length of entire event string",
"default" : 0
}, {
"name" : "filename",
"type" : "string",
"doc" : "log file name in which this event was recorded in pixel server",
"default" : ""
} ]
},
"doc" : "Mathtag event data"
}, {
"name" : "cid_data",
"type" : [ "null", {
"type" : "record",
"name" : "cid_data",
"doc" : "represents Connected ID information related to the MM UUID in the original event record",
"fields" : [ {
"name" : "one_of_uuid_or_connected_id",
"type" : "string",
"doc" : "One of: Connected ID, as reported by IDM; or the original UUID. Refer to next field to determine which."
}, {
"name" : "is_the_connected_id_real",
"type" : "boolean",
"doc" : "This flag will be set to 'true' if and only if the field 'one_of_uuid_or_connected_id' in this record contains an actual Connected ID value that was reported by IDM."
}, {
"name" : "uuid_cid_connection_type",
"type" : "int",
"doc" : "This flag represents the relationship between the MM_UUID (primary device ID in bid request) and the ConnectedID that is retrieved from Cross-Device ID Graph (IDM). 1 - Deterministic; 0 - Probabilistic; -1 - Unknown.",
"default" : -1
} ]
} ],
"doc" : "Connected ID enrichment",
"default" : null
}, {
"name" : "geo_data",
"type" : [ "null", {
"type" : "record",
"name" : "geo_data",
"namespace" : "com.mediamath.geo",
"doc" : "represents a record in geo meta data obtained from Digital Element",
"fields" : [ {
"name" : "ip_version",
"type" : "int",
"doc" : "version of Digital Element DB used to get geo metadata",
"default" : 0
}, {
"name" : "start_ip",
"type" : "long",
"doc" : "start IP address of the geo IP range to which this pixel events IP address belongs",
"default" : 0
}, {
"name" : "end_ip",
"type" : "long",
"doc" : "end IP address of the geo IP range to which this pixel events IP address belongs",
"default" : 0
}, {
"name" : "country",
"type" : "string",
"doc" : "name of the country in which the user fired the pixel",
"default" : ""
}, {
"name" : "country_code_iso",
"type" : "string",
"doc" : "an internationally recognized two-letter code (alpha-2) used to refer to countries and subdivisions",
"default" : ""
}, {
"name" : "country_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element country field. See Country for description",
"default" : 0
}, {
"name" : "region",
"type" : "string",
"doc" : "name of region in which the user fired the pixel",
"default" : ""
}, {
"name" : "region_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element region field. See Region for description",
"default" : 0
}, {
"name" : "dma",
"type" : "string",
"doc" : "name of designated market area in which the user fired the pixel",
"default" : ""
}, {
"name" : "dma_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element dma field. See Dma for description",
"default" : 0
}, {
"name" : "conn_speed",
"type" : "string",
"doc" : "reflects the speed of a user’s connection. For example, whether the user is connecting via dialup or a cable modem. This field can also include connection types such as satellite or mobile",
"default" : ""
}, {
"name" : "conn_speed_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element connection speed field. See ConnSpeed for description",
"default" : 0
}, {
"name" : "isp",
"type" : "string",
"doc" : "Internet service provider associated with the user who fired the pixel. Examples: AT&T, Verizon, Charter etc",
"default" : ""
}, {
"name" : "isp_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element isp field. See Isp for description.",
"default" : 0
}, {
"name" : "zipcode",
"type" : "string",
"doc" : "zipcode/postal code in which the user fired the pixel",
"default" : ""
}, {
"name" : "zipcode_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element zipcode field. See ZipCode for description.",
"default" : 0
}, {
"name" : "GMT_offset",
"type" : "int",
"doc" : "the current offset from Greenwich Mean Time (GMT/UTC) accounting for Daylight Savings Time, if appropriate",
"default" : 0
}, {
"name" : "in_dst",
"type" : "string",
"doc" : "if country is in daylight savings time or not. Y for yes or N for no",
"default" : ""
}, {
"name" : "homebiz_type_id",
"type" : "int",
"doc" : "connection type of the user who fired the pixel, either 1 for ‘business’ or 2 for ‘home’",
"default" : -1
}, {
"name" : "homebiz_type",
"type" : "string",
"doc" : "connection type of the user who fired the pixel, ‘business’ or ‘home’",
"default" : "?"
}, {
"name" : "region_code",
"type" : "int",
"doc" : "a numerical id provided by digital element that corresponds to the region name",
"default" : 0
}, {
"name" : "city",
"type" : "string",
"doc" : "concatination of country abbr + region abbr + city name",
"default" : ""
}, {
"name" : "city_code",
"type" : "int",
"doc" : "a numerical id provided by digital element that corresponds to the city name",
"default" : 0
}, {
"name" : "city_code_id",
"type" : "int",
"doc" : "unique MM ID tied to the Digital Element city code field. See CityCode for description",
"default" : 0
} ]
} ],
"doc" : "Geo data enrichment",
"default" : null
}, {
"name" : "misc_data",
"type" : [ "null", {
"type" : "record",
"name" : "misc_data",
"doc" : "11th field of MathTag log line",
"fields" : [ {
"name" : "last",
"type" : [ "null", {
"type" : "record",
"name" : "last",
"fields" : [ {
"name" : "count",
"type" : "long",
"doc" : "total number of records"
} ]
} ],
"doc" : "indicates last record in MathTag log file",
"default" : null
} ]
} ],
"doc" : "Misc data enrichment",
"default" : null
}, {
"name" : "standard_segments",
"type" : [ "null", {
"type" : "record",
"name" : "standard_segments",
"doc" : "metadata record providing insights into Standard Segments enrichment",
"fields" : [ {
"name" : "enriched",
"type" : "boolean",
"doc" : "indicates that this is a parent record from which additional SS records were generated"
} ]
} ],
"doc" : "Standard Segments",
"default" : null
} ]
}
')
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 's3://raw-logs-data-prod-superior-mm/pixels/avro/'
;
我验证了这个json格式,它是一个有效的格式。我得到以下错误:
语法错误:org.apache.hadoop.hive.ql.parse.parseexception:行5:0表属性中的外部输入“{”在“type”附近需要stringliteral
暂无答案!
目前还没有任何答案,快来回答吧!