update data flow diagram
This commit is contained in:
parent
176a28e1a1
commit
338a06e583
@ -46,41 +46,33 @@ An **S3-compatible storage provider** (e.g., MinIO) used to store and retrieve u
|
|||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
```mermaid
|
```mermaid
|
||||||
flowchart TD
|
flowchart TB
|
||||||
|
|
||||||
%% STAGE 1: DATA SOURCES
|
subgraph src ["Data source"]
|
||||||
A["Data Sources
|
direction LR
|
||||||
(S3 / MinIO, DBs, APIs)"] -->|Ingestion Jobs| B[Apache Airflow]
|
ext_api[/"API<br>(HTTP, REST, Graph)"/]
|
||||||
|
ext_s3@{ shape: cyl, label: "Object Storage<br>(S3, MinIO, GCS)" }
|
||||||
|
ext_db@{ shape: cyl, label: "Database<br>(MySQL, PostgreSQL)" }
|
||||||
|
ext_fs@{ shape: cyl, label: "Filesystem<br>(HDFS, NAS)" }
|
||||||
|
end
|
||||||
|
|
||||||
%% STAGE 2: RAW STORAGE
|
subgraph emgr ["Data Platform"]
|
||||||
B -->|Store Raw Data| C["Raw Zone
|
dag@{ shape: docs, label: "Python DAG" }
|
||||||
(S3 / MinIO)"]
|
af["Airflow"]
|
||||||
|
tr["Trino"]
|
||||||
|
ss("Superset")
|
||||||
|
end
|
||||||
|
|
||||||
%% STAGE 3: TRANSFORMATION
|
s3@{ shape: cyl, label: "S3<br>(MinIO)" }
|
||||||
C -->|DAG / ETL / SQL Queries| D["Trino
|
|
||||||
(Query Engine)"]
|
|
||||||
B -->|Workflow Orchestration| D
|
|
||||||
|
|
||||||
%% STAGE 4: PROCESSED STORAGE
|
dag -- (1a)<br>Fetch<br>raw data<br>(API, SDK) --> src
|
||||||
D -->|Write Processed Data| E["Processed / Curated Zone
|
dag -- (1b) --> tr
|
||||||
(S3 / MinIO)"]
|
tr -- (1b)<br>Fetch<br>raw data<br>(Trino connector) --> src
|
||||||
|
af -- (2)<br>Execute<br>script --> dag
|
||||||
|
dag -- (3)<br>Store<br>processed<br>data<br>(SQL) --> tr
|
||||||
|
s3 <-- (4)<br>Read/write data<br>(Hive / Iceberg format) --> tr
|
||||||
|
ss -- (5)<br>Query<br>processed<br>data<br>(SQL) --> tr
|
||||||
|
|
||||||
%% STAGE 5: QUERY LAYER
|
|
||||||
E -->|Query Interface| F["Trino
|
|
||||||
(SQL Access Layer)"]
|
|
||||||
|
|
||||||
%% STAGE 6: VISUALIZATION
|
|
||||||
F -->|Data Access| G["Apache Superset
|
|
||||||
(Dashboarding & Analytics)"]
|
|
||||||
|
|
||||||
%% LABELS
|
|
||||||
classDef core fill:#4a90e2,stroke:#2c3e50,stroke-width:1px,color:white;
|
|
||||||
classDef storage fill:#6dbf4b,stroke:#2c3e50,stroke-width:1px,color:white;
|
|
||||||
classDef optional fill:#aaaaaa,stroke:#333,stroke-width:0.5px,color:white;
|
|
||||||
|
|
||||||
class B,D,F,G core;
|
|
||||||
class C,E storage;
|
|
||||||
class H1,H2,H3,H4 optional;
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Data Pipeline
|
## Data Pipeline
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user