update data flow diagram
This commit is contained in:
parent
176a28e1a1
commit
338a06e583
@ -46,41 +46,33 @@ An **S3-compatible storage provider** (e.g., MinIO) used to store and retrieve u
|
||||
## Workflow
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
flowchart TB
|
||||
|
||||
%% STAGE 1: DATA SOURCES
|
||||
A["Data Sources
|
||||
(S3 / MinIO, DBs, APIs)"] -->|Ingestion Jobs| B[Apache Airflow]
|
||||
subgraph src ["Data source"]
|
||||
direction LR
|
||||
ext_api[/"API<br>(HTTP, REST, Graph)"/]
|
||||
ext_s3@{ shape: cyl, label: "Object Storage<br>(S3, MinIO, GCS)" }
|
||||
ext_db@{ shape: cyl, label: "Database<br>(MySQL, PostgreSQL)" }
|
||||
ext_fs@{ shape: cyl, label: "Filesystem<br>(HDFS, NAS)" }
|
||||
end
|
||||
|
||||
%% STAGE 2: RAW STORAGE
|
||||
B -->|Store Raw Data| C["Raw Zone
|
||||
(S3 / MinIO)"]
|
||||
subgraph emgr ["Data Platform"]
|
||||
dag@{ shape: docs, label: "Python DAG" }
|
||||
af["Airflow"]
|
||||
tr["Trino"]
|
||||
ss("Superset")
|
||||
end
|
||||
|
||||
%% STAGE 3: TRANSFORMATION
|
||||
C -->|DAG / ETL / SQL Queries| D["Trino
|
||||
(Query Engine)"]
|
||||
B -->|Workflow Orchestration| D
|
||||
s3@{ shape: cyl, label: "S3<br>(MinIO)" }
|
||||
|
||||
%% STAGE 4: PROCESSED STORAGE
|
||||
D -->|Write Processed Data| E["Processed / Curated Zone
|
||||
(S3 / MinIO)"]
|
||||
dag -- (1a)<br>Fetch<br>raw data<br>(API, SDK) --> src
|
||||
dag -- (1b) --> tr
|
||||
tr -- (1b)<br>Fetch<br>raw data<br>(Trino connector) --> src
|
||||
af -- (2)<br>Execute<br>script --> dag
|
||||
dag -- (3)<br>Store<br>processed<br>data<br>(SQL) --> tr
|
||||
s3 <-- (4)<br>Read/write data<br>(Hive / Iceberg format) --> tr
|
||||
ss -- (5)<br>Query<br>processed<br>data<br>(SQL) --> tr
|
||||
|
||||
%% STAGE 5: QUERY LAYER
|
||||
E -->|Query Interface| F["Trino
|
||||
(SQL Access Layer)"]
|
||||
|
||||
%% STAGE 6: VISUALIZATION
|
||||
F -->|Data Access| G["Apache Superset
|
||||
(Dashboarding & Analytics)"]
|
||||
|
||||
%% LABELS
|
||||
classDef core fill:#4a90e2,stroke:#2c3e50,stroke-width:1px,color:white;
|
||||
classDef storage fill:#6dbf4b,stroke:#2c3e50,stroke-width:1px,color:white;
|
||||
classDef optional fill:#aaaaaa,stroke:#333,stroke-width:0.5px,color:white;
|
||||
|
||||
class B,D,F,G core;
|
||||
class C,E storage;
|
||||
class H1,H2,H3,H4 optional;
|
||||
```
|
||||
|
||||
## Data Pipeline
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user