在配置 dify 工作流的时候有一种场景就是需要解析用户上传的文件内容,比如询问用户必要关键信息的时候,用户可以通过上传图片或者 word、pdf 文件等,方便用户去手动输入的步骤,那么我们可以通用抽象出一个工作流,当接收到是文件类型的时候先进行文字提取放入对话上下文,模拟用户输入的是文字,然后交给大模型去做后续操作准确率很高。

思路

本文只分享思路,你可以根据自己的场景进行扩展更多的方式。

如果是图片类型,那么可以直接使用大模型进行提取内容,如果你的大模型不支持多模态,那么你可以直接对接第三方 OCR 识别进行图片识别也是一样的效果。

如果是其他类型,比如 word、pdf、markdown 等文件,那么我们可以直接使用 dify 的自带节点组件(文档提取器)

如果还有音频类型,那么你可以直接使用 asr 实现转成文字,下面给一个 DSL 导入即可使用。

Dify 文件通用提取文字工作流分享

基于 dify 1.7.1版本的 DSL,导入选择支持视觉的模型即可使用,提取后输出的文档里的所有文档内容。

app:
  description: 'http://www.51it.wang 出品'
  icon: 🤖
  icon_background: '#FFEAD5'
  mode: workflow
  name: 文件 ocr 识别
  use_icon_as_answer_icon: false
dependencies:
- current_identifier: null
  type: marketplace
  value:
    marketplace_plugin_unique_identifier: langgenius/openai:0.2.3@5a7f82fa86e28332ad51941d0b491c1e8a38ead539656442f7bf4c6129cd15fa
kind: app
version: 0.3.1
workflow:
  conversation_variables: []
  environment_variables: []
  features:
    file_upload:
      allowed_file_extensions:
      - .JPG
      - .JPEG
      - .PNG
      - .GIF
      - .WEBP
      - .SVG
      allowed_file_types:
      - image
      allowed_file_upload_methods:
      - local_file
      - remote_url
      enabled: false
      fileUploadConfig:
        audio_file_size_limit: 50
        batch_count_limit: 5
        file_size_limit: 15
        image_file_size_limit: 10
        video_file_size_limit: 100
        workflow_file_upload_limit: 10
      image:
        enabled: false
        number_limits: 3
        transfer_methods:
        - local_file
        - remote_url
      number_limits: 3
    opening_statement: ''
    retriever_resource:
      enabled: true
    sensitive_word_avoidance:
      enabled: false
    speech_to_text:
      enabled: false
    suggested_questions: []
    suggested_questions_after_answer:
      enabled: false
    text_to_speech:
      enabled: false
      language: ''
      voice: ''
  graph:
    edges:
    - data:
        isInLoop: false
        sourceType: document-extractor
        targetType: end
      id: 1760947522841-source-1754657363849-target
      source: '1760947522841'
      sourceHandle: source
      target: '1754657363849'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: if-else
        targetType: llm
      id: 1760947763558-true-1760947801211-target
      source: '1760947763558'
      sourceHandle: 'true'
      target: '1760947801211'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: llm
        targetType: end
      id: 1760947801211-source-1760947834434-target
      source: '1760947801211'
      sourceHandle: source
      target: '1760947834434'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInLoop: false
        sourceType: if-else
        targetType: document-extractor
      id: 1760947763558-false-1760947522841-target
      source: '1760947763558'
      sourceHandle: 'false'
      target: '1760947522841'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInLoop: false
        sourceType: start
        targetType: if-else
      id: 1754657032768-source-1760947763558-target
      source: '1754657032768'
      sourceHandle: source
      target: '1760947763558'
      targetHandle: target
      type: custom
      zIndex: 0
    nodes:
    - data:
        desc: ''
        selected: false
        title: 开始
        type: start
        variables:
        - allowed_file_extensions: []
          allowed_file_types:
          - image
          - document
          allowed_file_upload_methods:
          - local_file
          - remote_url
          hide: false
          label: file
          max_length: 5
          options: []
          required: true
          type: file-list
          variable: file
      height: 90
      id: '1754657032768'
      position:
        x: 30
        y: 275
      positionAbsolute:
        x: 30
        y: 275
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        outputs:
        - value_selector:
          - '1760947522841'
          - text
          value_type: string
          variable: ocr_result
        selected: false
        title: 结束
        type: end
      height: 90
      id: '1754657363849'
      position:
        x: 1165.9601844704073
        y: 390.25315467340494
      positionAbsolute:
        x: 1165.9601844704073
        y: 390.25315467340494
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        is_array_file: false
        selected: false
        title: 文档提取器
        type: document-extractor
        variable_selector:
        - '1754657032768'
        - file
      height: 94
      id: '1760947522841'
      position:
        x: 827.5799294442232
        y: 390.25315467340494
      positionAbsolute:
        x: 827.5799294442232
        y: 390.25315467340494
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        cases:
        - case_id: 'true'
          conditions:
          - comparison_operator: contains
            id: de61f5d1-c57c-42b2-b961-c0826e25a2e5
            sub_variable_condition:
              case_id: 30c4d43b-2bcc-44a7-9361-c235989ecfac
              conditions:
              - comparison_operator: in
                id: 80bb51fb-ab74-45d8-8098-1384a7539065
                key: type
                value:
                - image
                varType: string
              logical_operator: and
            value: ''
            varType: array[file]
            variable_selector:
            - '1754657032768'
            - file
          id: 'true'
          logical_operator: and
        desc: ''
        selected: false
        title: 条件分支
        type: if-else
      height: 150
      id: '1760947763558'
      position:
        x: 364
        y: 275
      positionAbsolute:
        x: 364
        y: 275
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        context:
          enabled: false
          variable_selector: []
        desc: ''
        model:
          completion_params:
            frequency_penalty: 0.3
            presence_penalty: 0.2
            temperature: 0.5
            top_p: 0.85
          mode: chat
          name: gpt-4.1-mini-2025-04-14
          provider: langgenius/openai/openai
        prompt_template:
        - id: fcc4af5a-178e-4b6b-84d7-d2c603fa4f8e
          role: system
          text: '根据用户上传的文件进行文字内容提取,然后通过自然语言或排版等优化提取内容,最后返回所有信息。

            返回的内容格式为:

            文件名为:{{ 占位 }},ocr识别结果为:{{占位}}'
        - id: 27600332-e624-4d0e-b28a-0cf8631cf6ab
          role: user
          text: 提取图片{{#1754657032768.file#}}文字内容
        selected: false
        title: LLM
        type: llm
        variables: []
        vision:
          configs:
            detail: high
            variable_selector:
            - '1754657032768'
            - file
          enabled: true
      height: 90
      id: '1760947801211'
      position:
        x: 851.8571428571428
        y: 190.49999999999997
      positionAbsolute:
        x: 851.8571428571428
        y: 190.49999999999997
      selected: true
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        outputs:
        - value_selector:
          - '1760947801211'
          - text
          value_type: string
          variable: ocr_result
        selected: false
        title: 结束 2
        type: end
      height: 90
      id: '1760947834434'
      position:
        x: 1189.029421515851
        y: 190.49999999999997
      positionAbsolute:
        x: 1189.029421515851
        y: 190.49999999999997
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    viewport:
      x: -301.2920975051561
      y: 77.23611359625488
      zoom: 1.1306322140286418

升级版,并行支持图片和文档同时聚合提取。

app:
  description: ''
  icon: 🤖
  icon_background: '#FFEAD5'
  mode: workflow
  name: 文件 ocr 识别多文件混合识别,www.51it.wang博客出品
  use_icon_as_answer_icon: false
dependencies:
- current_identifier: null
  type: marketplace
  value:
    marketplace_plugin_unique_identifier: langgenius/openai:0.2.3@5a7f82fa86e28332ad51941d0b491c1e8a38ead539656442f7bf4c6129cd15fa
kind: app
version: 0.3.1
workflow:
  conversation_variables: []
  environment_variables: []
  features:
    file_upload:
      allowed_file_extensions:
      - .JPG
      - .JPEG
      - .PNG
      - .GIF
      - .WEBP
      - .SVG
      allowed_file_types:
      - image
      allowed_file_upload_methods:
      - remote_url
      - local_file
      enabled: true
      fileUploadConfig:
        audio_file_size_limit: 50
        batch_count_limit: 5
        file_size_limit: 15
        image_file_size_limit: 10
        video_file_size_limit: 100
        workflow_file_upload_limit: 10
      image:
        enabled: false
        number_limits: 3
        transfer_methods:
        - local_file
        - remote_url
      number_limits: 3
    opening_statement: ''
    retriever_resource:
      enabled: true
    sensitive_word_avoidance:
      enabled: false
    speech_to_text:
      enabled: false
    suggested_questions: []
    suggested_questions_after_answer:
      enabled: false
    text_to_speech:
      enabled: false
      language: ''
      voice: ''
  graph:
    edges:
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: start
        targetType: iteration
      id: 1754657032768-source-1761103378304-target
      source: '1754657032768'
      sourceHandle: source
      target: '1761103378304'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        sourceType: iteration-start
        targetType: if-else
      id: 1761103378304start-source-1761103413723-target
      source: 1761103378304start
      sourceHandle: source
      target: '1761103413723'
      targetHandle: target
      type: custom
      zIndex: 1002
    - data:
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        sourceType: if-else
        targetType: llm
      id: 1761103413723-true-1761103710965-target
      source: '1761103413723'
      sourceHandle: 'true'
      target: '1761103710965'
      targetHandle: target
      type: custom
      zIndex: 1002
    - data:
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        sourceType: if-else
        targetType: document-extractor
      id: 1761103413723-false-1761103714182-target
      source: '1761103413723'
      sourceHandle: 'false'
      target: '1761103714182'
      targetHandle: target
      type: custom
      zIndex: 1002
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: iteration
        targetType: end
      id: 1761103378304-source-1761104240788-target
      source: '1761103378304'
      sourceHandle: source
      target: '1761104240788'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        sourceType: llm
        targetType: variable-aggregator
      id: 1761103710965-source-1761105961859-target
      source: '1761103710965'
      sourceHandle: source
      target: '1761105961859'
      targetHandle: target
      type: custom
      zIndex: 1002
    - data:
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        sourceType: document-extractor
        targetType: variable-aggregator
      id: 1761103714182-source-1761105961859-target
      source: '1761103714182'
      sourceHandle: source
      target: '1761105961859'
      targetHandle: target
      type: custom
      zIndex: 1002
    nodes:
    - data:
        desc: ''
        selected: false
        title: 开始
        type: start
        variables:
        - allowed_file_extensions: []
          allowed_file_types:
          - image
          - document
          allowed_file_upload_methods:
          - local_file
          - remote_url
          hide: false
          label: file
          max_length: 5
          options: []
          required: true
          type: file-list
          variable: file
      height: 90
      id: '1754657032768'
      position:
        x: 30
        y: 408.5
      positionAbsolute:
        x: 30
        y: 408.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        error_handle_mode: terminated
        height: 417
        is_parallel: true
        iterator_input_type: array[file]
        iterator_selector:
        - '1754657032768'
        - file
        output_selector:
        - '1761105961859'
        - output
        output_type: array[string]
        parallel_nums: 2
        selected: false
        start_node_id: 1761103378304start
        title: 迭代
        type: iteration
        width: 1133
      height: 417
      id: '1761103378304'
      position:
        x: 334
        y: 408.5
      positionAbsolute:
        x: 334
        y: 408.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 1133
      zIndex: 1
    - data:
        desc: ''
        isInIteration: true
        selected: false
        title: ''
        type: iteration-start
      draggable: false
      height: 48
      id: 1761103378304start
      parentId: '1761103378304'
      position:
        x: 60
        y: 104
      positionAbsolute:
        x: 394
        y: 512.5
      selectable: false
      sourcePosition: right
      targetPosition: left
      type: custom-iteration-start
      width: 44
      zIndex: 1002
    - data:
        cases:
        - case_id: 'true'
          conditions:
          - comparison_operator: in
            id: 2cd65b49-4785-4f30-8a69-2b72184d3b5f
            value:
            - image
            varType: string
            variable_selector:
            - '1761103378304'
            - item
            - type
          id: 'true'
          logical_operator: and
        desc: ''
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        selected: false
        title: 条件分支 2
        type: if-else
      height: 126
      id: '1761103413723'
      parentId: '1761103378304'
      position:
        x: 204
        y: 65
      positionAbsolute:
        x: 538
        y: 473.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
      zIndex: 1002
    - data:
        context:
          enabled: false
          variable_selector: []
        desc: ''
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        model:
          completion_params:
            temperature: 0.7
          mode: chat
          name: gpt-4.1-mini-2025-04-14
          provider: langgenius/openai/openai
        prompt_template:
        - id: 5524fcc1-3240-4f0c-8316-530b99334965
          role: system
          text: '根据用户上传的文件进行文字内容提取,然后通过自然语言或排版等优化提取内容,最后返回所有信息。

            返回的内容格式为:

            文件名为:{{ 占位 }},ocr识别结果为:{{占位}}'
        - id: 36d169c5-1760-46fb-a20f-794dd262f455
          role: user
          text: 提取图片{{#1761103378304.item#}}文字内容
        selected: false
        title: 图片识别
        type: llm
        variables: []
        vision:
          configs:
            detail: high
            variable_selector:
            - '1761103378304'
            - item
          enabled: true
      height: 90
      id: '1761103710965'
      parentId: '1761103378304'
      position:
        x: 508
        y: 83
      positionAbsolute:
        x: 842
        y: 491.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
      zIndex: 1002
    - data:
        desc: ''
        isInIteration: true
        isInLoop: false
        is_array_file: false
        iteration_id: '1761103378304'
        selected: false
        title: 文档提取器 3
        type: document-extractor
        variable_selector:
        - '1761103378304'
        - item
      height: 94
      id: '1761103714182'
      parentId: '1761103378304'
      position:
        x: 508
        y: 213
      positionAbsolute:
        x: 842
        y: 621.5
      selected: true
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
      zIndex: 1002
    - data:
        desc: ''
        outputs:
        - value_selector:
          - '1761103378304'
          - output
          value_type: array[string]
          variable: ocr_result
        selected: false
        title: 结束 3
        type: end
      height: 90
      id: '1761104240788'
      position:
        x: 1527
        y: 408.5
      positionAbsolute:
        x: 1527
        y: 408.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        isInIteration: true
        isInLoop: false
        iteration_id: '1761103378304'
        output_type: string
        selected: false
        title: 变量聚合器
        type: variable-aggregator
        variables:
        - - '1761103710965'
          - text
        - - '1761103714182'
          - text
      height: 136
      id: '1761105961859'
      parentId: '1761103378304'
      position:
        x: 812
        y: 60
      positionAbsolute:
        x: 1146
        y: 468.5
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
      zIndex: 1002
    viewport:
      x: 64.70000000000016
      y: 11.050000000000011
      zoom: 0.7
文章目录