在配置 dify 工作流的时候有一种场景就是需要解析用户上传的文件内容,比如询问用户必要关键信息的时候,用户可以通过上传图片或者 word、pdf 文件等,方便用户去手动输入的步骤,那么我们可以通用抽象出一个工作流,当接收到是文件类型的时候先进行文字提取放入对话上下文,模拟用户输入的是文字,然后交给大模型去做后续操作准确率很高。

思路

本文只分享思路,你可以根据自己的场景进行扩展更多的方式。

如果是图片类型,那么可以直接使用大模型进行提取内容,如果你的大模型不支持多模态,那么你可以直接对接第三方 OCR 识别进行图片识别也是一样的效果。

如果是其他类型,比如 word、pdf、markdown 等文件,那么我们可以直接使用 dify 的自带节点组件(文档提取器)

如果还有音频类型,那么你可以直接使用 asr 实现转成文字,下面给一个 DSL 导入即可使用。

Dify 文件通用提取文字工作流分享

基于 dify 1.7.1版本的 DSL,导入选择支持视觉的模型即可使用,提取后输出的文档里的所有文档内容。

app:
  description: 'http://www.51it.wang 出品'
  icon: 🤖
  icon_background: '#FFEAD5'
  mode: workflow
  name: 文件 ocr 识别
  use_icon_as_answer_icon: false
dependencies:
- current_identifier: null
  type: marketplace
  value:
    marketplace_plugin_unique_identifier: langgenius/openai:0.2.3@5a7f82fa86e28332ad51941d0b491c1e8a38ead539656442f7bf4c6129cd15fa
kind: app
version: 0.3.1
workflow:
  conversation_variables: []
  environment_variables: []
  features:
    file_upload:
      allowed_file_extensions:
      - .JPG
      - .JPEG
      - .PNG
      - .GIF
      - .WEBP
      - .SVG
      allowed_file_types:
      - image
      allowed_file_upload_methods:
      - local_file
      - remote_url
      enabled: false
      fileUploadConfig:
        audio_file_size_limit: 50
        batch_count_limit: 5
        file_size_limit: 15
        image_file_size_limit: 10
        video_file_size_limit: 100
        workflow_file_upload_limit: 10
      image:
        enabled: false
        number_limits: 3
        transfer_methods:
        - local_file
        - remote_url
      number_limits: 3
    opening_statement: ''
    retriever_resource:
      enabled: true
    sensitive_word_avoidance:
      enabled: false
    speech_to_text:
      enabled: false
    suggested_questions: []
    suggested_questions_after_answer:
      enabled: false
    text_to_speech:
      enabled: false
      language: ''
      voice: ''
  graph:
    edges:
    - data:
        isInLoop: false
        sourceType: document-extractor
        targetType: end
      id: 1760947522841-source-1754657363849-target
      source: '1760947522841'
      sourceHandle: source
      target: '1754657363849'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: if-else
        targetType: llm
      id: 1760947763558-true-1760947801211-target
      source: '1760947763558'
      sourceHandle: 'true'
      target: '1760947801211'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInIteration: false
        isInLoop: false
        sourceType: llm
        targetType: end
      id: 1760947801211-source-1760947834434-target
      source: '1760947801211'
      sourceHandle: source
      target: '1760947834434'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInLoop: false
        sourceType: if-else
        targetType: document-extractor
      id: 1760947763558-false-1760947522841-target
      source: '1760947763558'
      sourceHandle: 'false'
      target: '1760947522841'
      targetHandle: target
      type: custom
      zIndex: 0
    - data:
        isInLoop: false
        sourceType: start
        targetType: if-else
      id: 1754657032768-source-1760947763558-target
      source: '1754657032768'
      sourceHandle: source
      target: '1760947763558'
      targetHandle: target
      type: custom
      zIndex: 0
    nodes:
    - data:
        desc: ''
        selected: false
        title: 开始
        type: start
        variables:
        - allowed_file_extensions: []
          allowed_file_types:
          - image
          - document
          allowed_file_upload_methods:
          - local_file
          - remote_url
          hide: false
          label: file
          max_length: 5
          options: []
          required: true
          type: file-list
          variable: file
      height: 90
      id: '1754657032768'
      position:
        x: 30
        y: 275
      positionAbsolute:
        x: 30
        y: 275
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        outputs:
        - value_selector:
          - '1760947522841'
          - text
          value_type: string
          variable: ocr_result
        selected: false
        title: 结束
        type: end
      height: 90
      id: '1754657363849'
      position:
        x: 1165.9601844704073
        y: 390.25315467340494
      positionAbsolute:
        x: 1165.9601844704073
        y: 390.25315467340494
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        is_array_file: false
        selected: false
        title: 文档提取器
        type: document-extractor
        variable_selector:
        - '1754657032768'
        - file
      height: 94
      id: '1760947522841'
      position:
        x: 827.5799294442232
        y: 390.25315467340494
      positionAbsolute:
        x: 827.5799294442232
        y: 390.25315467340494
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        cases:
        - case_id: 'true'
          conditions:
          - comparison_operator: contains
            id: de61f5d1-c57c-42b2-b961-c0826e25a2e5
            sub_variable_condition:
              case_id: 30c4d43b-2bcc-44a7-9361-c235989ecfac
              conditions:
              - comparison_operator: in
                id: 80bb51fb-ab74-45d8-8098-1384a7539065
                key: type
                value:
                - image
                varType: string
              logical_operator: and
            value: ''
            varType: array[file]
            variable_selector:
            - '1754657032768'
            - file
          id: 'true'
          logical_operator: and
        desc: ''
        selected: false
        title: 条件分支
        type: if-else
      height: 150
      id: '1760947763558'
      position:
        x: 364
        y: 275
      positionAbsolute:
        x: 364
        y: 275
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        context:
          enabled: false
          variable_selector: []
        desc: ''
        model:
          completion_params:
            frequency_penalty: 0.3
            presence_penalty: 0.2
            temperature: 0.5
            top_p: 0.85
          mode: chat
          name: gpt-4.1-mini-2025-04-14
          provider: langgenius/openai/openai
        prompt_template:
        - id: fcc4af5a-178e-4b6b-84d7-d2c603fa4f8e
          role: system
          text: '根据用户上传的文件进行文字内容提取,然后通过自然语言或排版等优化提取内容,最后返回所有信息。

            返回的内容格式为:

            文件名为:{{ 占位 }},ocr识别结果为:{{占位}}'
        - id: 27600332-e624-4d0e-b28a-0cf8631cf6ab
          role: user
          text: 提取图片{{#1754657032768.file#}}文字内容
        selected: false
        title: LLM
        type: llm
        variables: []
        vision:
          configs:
            detail: high
            variable_selector:
            - '1754657032768'
            - file
          enabled: true
      height: 90
      id: '1760947801211'
      position:
        x: 851.8571428571428
        y: 190.49999999999997
      positionAbsolute:
        x: 851.8571428571428
        y: 190.49999999999997
      selected: true
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    - data:
        desc: ''
        outputs:
        - value_selector:
          - '1760947801211'
          - text
          value_type: string
          variable: ocr_result
        selected: false
        title: 结束 2
        type: end
      height: 90
      id: '1760947834434'
      position:
        x: 1189.029421515851
        y: 190.49999999999997
      positionAbsolute:
        x: 1189.029421515851
        y: 190.49999999999997
      selected: false
      sourcePosition: right
      targetPosition: left
      type: custom
      width: 244
    viewport:
      x: -301.2920975051561
      y: 77.23611359625488
      zoom: 1.1306322140286418
文章目录