spark作业和配置单元脚本自动化

x33g5p2x  于 2021-06-28  发布在  Hive
关注(0)|答案(1)|浏览(398)

我想依次运行spark作业和配置单元sql脚本。我是用shell脚本做的。有没有其他建议的方法/最佳实践来实现这一点?我在网上搜索过,oozie适合这个场景吗?

u4vypkhs

u4vypkhs1#

是的,您可以定义oozie工作流,以便首先执行spark操作,然后执行hiveql操作。例如,您可以有这样一个工作流定义。

<workflow-app name="your_sequence" xmlns="uri:oozie:workflow:0.1">
    ...
    <start to="firstSparkJob"/>

    <action name="firstSparkJob">
        <spark xmlns="uri:oozie:spark-action:0.1">
            <job-tracker>[JOB-TRACKER]</job-tracker>
            <name-node>[NAME-NODE]</name-node>
            <prepare>
               <delete path="[PATH]"/>
               ...
               <mkdir path="[PATH]"/>
               ...
            </prepare>
            <job-xml>[SPARK SETTINGS FILE]</job-xml>
            <configuration>
                <property>
                    <name>[PROPERTY-NAME]</name>
                    <value>[PROPERTY-VALUE]</value>
                </property>
                ...
            </configuration>
            <master>[SPARK MASTER URL]</master>
            <mode>[SPARK MODE]</mode>
            <name>[SPARK JOB NAME]</name>
            <class>[SPARK MAIN CLASS]</class>
            <jar>[SPARK DEPENDENCIES JAR / PYTHON FILE]</jar>
            <spark-opts>[SPARK-OPTIONS]</spark-opts>
            <arg>[ARG-VALUE]</arg>
                ...
            <arg>[ARG-VALUE]</arg>
            ...
        </spark>
        <ok to="HiveAction"/>
        <error to="ErrorSpark"/>
    </action>

    <action name="HiveAction">
        <hive xmlns="uri:oozie:hive-action:0.2">
            <job-tracker>[JOB-TRACKER]</job-tracker>
            <name-node>[NAME-NODE]</name-node>
            <prepare>
               <delete path="[PATH]"/>
               ...
               <mkdir path="[PATH]"/>
               ...
            </prepare>
            <job-xml>[HIVE SETTINGS FILE]</job-xml>
            <configuration>
                <property>
                    <name>[PROPERTY-NAME]</name>
                    <value>[PROPERTY-VALUE]</value>
                </property>
                ...
            </configuration>
            <script>[HIVE-SCRIPT]</script>
            <param>[PARAM-VALUE]</param>
                ...
            <param>[PARAM-VALUE]</param>
            <file>[FILE-PATH]</file>
            ...
            <archive>[FILE-PATH]</archive>
            ...
        </hive>
        <ok to="end"/>
        <error to="ErrorHive"/>
    </action>
    ...
    <end name="end"/>
</workflow-app>

相关问题