DATA-Code 1-050624-120338

Uploaded by

geraldo.junior

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

18 views3 pages

DATA-Code 1-050624-120338

Uploaded by

geraldo.junior

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 3

Code 1

1 class RunPipelineCasinoVentes(RunPipeline):
2 retailer_name = CASINO_RETAILER_NAME
3 schema_input = SCHEMA
4
5 def processing_data(self, element):
6 element = self.add_technical_fields_processing(element, element)
7 typologie = element[VentesTargetOutput.typologie].strip()
8 is_regex = re.search(r"^(322247|360282).+", element[VentesTargetOutput.ean])
9 if not is_regex and typologie == "2":
10 element[VentesTargetOutput.typologie] = "MN"
11 elif not is_regex and typologie == "1":
12 element[VentesTargetOutput.typologie] = "MDD"
13 elif is_regex and typologie in ("0", "1"):
14 element[VentesTargetOutput.typologie] = "MDD"
15 else:
16 element[VentesTargetOutput.typologie] = None
17
18 PREFIX_S = "S"
19 code_fournisseur = element[VentesTargetOutput.code_fournisseur]
20 if code_fournisseur is not None:
21 code_fournisseur = code_fournisseur.strip()
22 if not code_fournisseur.startswith(PREFIX_S):
23 code_fournisseur = PREFIX_S + code_fournisseur
24 element[VentesTargetOutput.code_fournisseur] = code_fournisseur[:7]
25 return element
26
27 @staticmethod
28 def preprocessing_data(element, schema_input):
29 return preprocessing_common(element, schema_input)
30
31 def get_reader(self):
32 return CSVReader(encoding=self.encoding)
33
34 def apply_processing_data(self, preprocessed_data, pipeline):
35 keys = (
36 VentesTargetOutput.annee_mois,
37 VentesTargetOutput.ean,
38 VentesTargetOutput.code_fournisseur,
39 VentesTargetOutput.nom_fournisseur,
40 VentesTargetOutput.promo,
41 VentesTargetOutput.code_interne,
42 VentesTargetOutput.code_ue,
43 VentesTargetOutput.code_famille,
44 VentesTargetOutput.libelle_pdt,
45 VentesTargetOutput.typologie,
46 )
47 fields_to_sum = (
48 VentesTargetOutput.ca_ventes_ht,
49 VentesTargetOutput.ca_ventes_ttc,
50 VentesTargetOutput.ca_srp,
51 VentesTargetOutput.volume,
52 )
53 individual_fields = VentesTargetOutput().get_technical_fields_preprocessing()
54 processing = (
55 preprocessed_data
56 | NORMALIZED_COLUMNS
57 >> beam.Map(
58 self.get_normalized_columns,
59 schema_input=self.schema_input,
60 individual_fields=individual_fields,
61 )
62 | sum_values(keys, fields_to_sum, individual_fields)
63 | "processing" >> beam.Map(self.processing_data)
64 )
65 return processing
66

1 class RunPipeline:
2 retailer_name = None
3 is_processing = True
4 schema_input = None
5
6 def __init__(
7 self,
8 input_data: str,
9 options: PipelineOptions,
10 date_file: str,
11 debug_mode: bool,
12 encoding: str,
13 table_name_preprocessing: str,
14 table_name_processing: str,
15 ):
16 self.input_data = input_data
17 self.options = options
18 self.date_file = date_file
19 self.debug_mode = debug_mode
20 self.encoding = encoding
21 self.table_name_preprocessing = table_name_preprocessing
22 self.table_name_processing = table_name_processing
23 self.is_file = True if self.input_data.startswith("gs://") else False
24
25 def keep_idempotence(self, pipeline):
26 empty_pcollection = pipeline | "CreateEmptyPBegin" >> beam.Create([self.input_data])
27 if self.is_file:
28 empty_pcollection = empty_pcollection | "checkFileExist" >> beam.Map(check_file_exists)
29
30 file_path = empty_pcollection | "DeleteAndReturnFilePath" >> beam.Map(
31 delete_and_return_file_path,
32 table_preprocessing=self.table_name_preprocessing,
33 table_processing=self.table_name_processing,
34 column_name_preprocessing=Table.date_file,
35 column_name_processing=Table.execution_date,
36 date_file=self.date_file,
37 is_processing=self.is_processing,
38 )
39 return file_path
40
41 def write_processing(self, processed_data):
42 if self.debug_mode:
43 processed_data | "Printing before WriteToBigQuery Processing" >> beam.Map(print)
44 else:
45 processed_data | "WriteProcessing" >> WriteToBigQuery(
46 table=self.table_name_processing,
47 create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER,
48 write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
49 )
50
51 def write_preprocessing(self, preprocessed_data):
52 if self.debug_mode:
53 preprocessed_data | "Printing before WriteToBigQuery Preprocessing" >> beam.Map(print)
54 else:
55 preprocessed_data | "WritePreprocessing" >> WriteToBigQuery(
56 table=self.table_name_preprocessing,
57 create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER,
58 write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
59 )
60
61 @staticmethod
62 def add_technical_fields_processing(
63 element_processing: Dict[str, Any],
64 element_preprocessing: Dict[str, Any],
65 ) -> Dict[str, Any]:
66 element_processing[Table.processing_ts] = datetime.utcnow()
67 element_processing[Table.preprocessing_ts] = element_preprocessing[Table.preprocessing_ts].isoformat()
68 element_processing[Table.source_files] = element_preprocessing.pop(Table.source_file)
69 element_processing[Table.execution_date] = element_preprocessing.pop(Table.date_file)
70 element_processing[Table.retailer] = element_preprocessing[Table.retailer]
71 return element_processing
72
73 def add_technical_fields_preprocessing(
74 self,
75 element: Dict[str, Any],
76 source_file: str,
77 date_file: str,
78 retailer_name: Any,
79 ) -> Dict[str, Any]:
80
81 element[Table.preprocessing_ts] = datetime.utcnow()
82 element[Table.source_file] = source_file
83 element[Table.date_file] = date_file
84 if isinstance(retailer_name, dict):
85 element[Table.retailer] = next(
86 (value for key, value in retailer_name.items() if element[ENSEIGNE] == key),
87 "null",
88 )
89 else:
90 element[Table.retailer] = retailer_name
91 return element
92
93 @staticmethod
94 def processing_data(element):
95 return element
96
97 def apply_processing_data(self, preprocessed_data, pipeline):
98 return preprocessed_data | "processing" >> beam.Map(self.processing_data)
99
100 @staticmethod
101 def preprocessing_data(element: Dict[str, Any], schema_input) -> Dict[str, Any]:
102 return element
103
104 def apply_preprocessing_data(self, reader_data):
105 return (
106 reader_data
107 | "preprocessing" >> beam.Map(self.preprocessing_data, schema_input=self.schema_input)
108 | "TechnicalsFieldsPreprocessing"
109 >> beam.Map(
110 self.add_technical_fields_preprocessing,
111 source_file=self.input_data,
112 date_file=self.date_file,
113 retailer_name=self.retailer_name,
114 )
115 )
116
117 def get_reader(self):
118 pass
119
120 @staticmethod
121 def get_normalized_columns(
122 element,
123 schema_input,
124 individual_fields=[],
125 ):
126 result = {}
127 for k, v in schema_input.items():
128 if NORMALIZED_COL_NAME in v.keys():
129 result[v[NORMALIZED_COL_NAME]] = element[k]
130 for k in individual_fields:
131 result[k] = element[k]
132 return result
133
134 def run(self) -> None:
135 with beam.Pipeline(options=self.options) as pipeline:
136 file_path = self.keep_idempotence(pipeline)
137
138 reader_data = file_path | "ReadAndparse" >> beam.ParDo(self.get_reader())
139
140 preprocessed_data = self.apply_preprocessing_data(reader_data)
141
142 self.write_preprocessing(preprocessed_data)
143
144 if self.is_processing:
145 processed_data = self.apply_processing_data(preprocessed_data, pipeline=pipeline)
146
147 self.write_processing(processed_data)
148

Etl Commands For Pyspark
No ratings yet
Etl Commands For Pyspark
8 pages
Odoo API
100% (1)
Odoo API
30 pages
Openerp Reference
No ratings yet
Openerp Reference
26 pages
index
No ratings yet
index
6 pages
journal
No ratings yet
journal
47 pages
Script Redis
No ratings yet
Script Redis
4 pages
10 Python&Hadoop
No ratings yet
10 Python&Hadoop
32 pages
csv_processor
No ratings yet
csv_processor
3 pages
gcloud dataflow jobs run iotflow
No ratings yet
gcloud dataflow jobs run iotflow
6 pages
Accelerated Data Science Getting Started Cheat Sheet Cudf 2003937 r4
No ratings yet
Accelerated Data Science Getting Started Cheat Sheet Cudf 2003937 r4
2 pages
Car Analytics Solution
No ratings yet
Car Analytics Solution
4 pages
Automl Code
No ratings yet
Automl Code
3 pages
Question Bank-BDA (Module 1&2) 2
No ratings yet
Question Bank-BDA (Module 1&2) 2
5 pages
Mastering Odoo 11.0 Development: Mohamed Magdy
No ratings yet
Mastering Odoo 11.0 Development: Mohamed Magdy
13 pages
SCD Typ2 in Databricks Azure
0% (1)
SCD Typ2 in Databricks Azure
8 pages
2023 08 05 13 43 36 - 1691223216
No ratings yet
2023 08 05 13 43 36 - 1691223216
7 pages
Numpy Cheatsheet
No ratings yet
Numpy Cheatsheet
11 pages
Python Applications
No ratings yet
Python Applications
8 pages
Python Code
No ratings yet
Python Code
5 pages
dw lab file
No ratings yet
dw lab file
18 pages
New Green Field School
No ratings yet
New Green Field School
33 pages
Microsoft Visual Basic Interview Questions: Microsoft VB Certification Review
From Everand
Microsoft Visual Basic Interview Questions: Microsoft VB Certification Review
Equity Press
No ratings yet
SQL Cheat Sheet Python
No ratings yet
SQL Cheat Sheet Python
1 page
INSTRUCTIONS
No ratings yet
INSTRUCTIONS
4 pages
ML lab manual 1-10
No ratings yet
ML lab manual 1-10
58 pages
(Big Data Analytics With PySpark) (CheatSheet)
No ratings yet
(Big Data Analytics With PySpark) (CheatSheet)
7 pages
Data Wrangling Notebook Summary
No ratings yet
Data Wrangling Notebook Summary
9 pages
Feature Engineering
No ratings yet
Feature Engineering
10 pages
ELT Using Pandas
No ratings yet
ELT Using Pandas
5 pages
Parselineproduct: Float Int
No ratings yet
Parselineproduct: Float Int
1 page
EDA Python for Data Analsis
No ratings yet
EDA Python for Data Analsis
10 pages
导入所需库
No ratings yet
导入所需库
20 pages
Lab 6
No ratings yet
Lab 6
9 pages
script
No ratings yet
script
5 pages
statemachine
No ratings yet
statemachine
24 pages
Py Spark
No ratings yet
Py Spark
7 pages
Pandas Data Manipulation Extended CheatSheet 1731972219
No ratings yet
Pandas Data Manipulation Extended CheatSheet 1731972219
9 pages
Py Spark 3 Quick Reference Guide
No ratings yet
Py Spark 3 Quick Reference Guide
2 pages
DMC - Record
No ratings yet
DMC - Record
54 pages
?Stuck in a Loop of Rejections_ Let’s Break the Cycle!?
No ratings yet
?Stuck in a Loop of Rejections_ Let’s Break the Cycle!?
7 pages
Extract, Transform and Load (ETL)
No ratings yet
Extract, Transform and Load (ETL)
31 pages
Updated Code
No ratings yet
Updated Code
7 pages
Code Explanation
No ratings yet
Code Explanation
3 pages
Python Code
No ratings yet
Python Code
7 pages
p6
No ratings yet
p6
9 pages
De Mod 2 Transform Data With Spark
No ratings yet
De Mod 2 Transform Data With Spark
32 pages
Document 4 (2)
No ratings yet
Document 4 (2)
42 pages
Python Pyspark q's
No ratings yet
Python Pyspark q's
16 pages
Python - Data Analysis
No ratings yet
Python - Data Analysis
11 pages
Combine Description Child Entities [External]
No ratings yet
Combine Description Child Entities [External]
14 pages
Delhivery Feature Engineering - Solution Approach
No ratings yet
Delhivery Feature Engineering - Solution Approach
7 pages
Top 100 Pyspark Functions for Data Engineers 1738131847
No ratings yet
Top 100 Pyspark Functions for Data Engineers 1738131847
30 pages
Spark Test Que
No ratings yet
Spark Test Que
3 pages
CSV and Coding PDF
No ratings yet
CSV and Coding PDF
37 pages
Data Science
No ratings yet
Data Science
108 pages
DATA MINING LAB MANAUL
No ratings yet
DATA MINING LAB MANAUL
32 pages
# (Data Preprocessing) : (Cheatsheet)
No ratings yet
# (Data Preprocessing) : (Cheatsheet)
10 pages
PySpark Transformations
No ratings yet
PySpark Transformations
18 pages
Blazor and API Example: Classroom Quiz Application
From Everand
Blazor and API Example: Classroom Quiz Application
Taurius Litvinavicius
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
PIR Sensor
50% (2)
PIR Sensor
3 pages
Relevance of Entrepreneurship To An Organization
No ratings yet
Relevance of Entrepreneurship To An Organization
2 pages
Supervisor comments Examplars on the EE (2)
No ratings yet
Supervisor comments Examplars on the EE (2)
7 pages
Tutorial Chapter 4
No ratings yet
Tutorial Chapter 4
4 pages
284575
No ratings yet
284575
6 pages
Synchronic Number Reference 1 To 13
No ratings yet
Synchronic Number Reference 1 To 13
24 pages
Generator Technical Excitation System Specification
No ratings yet
Generator Technical Excitation System Specification
20 pages
Accounting Information Systems and Smes Performance
No ratings yet
Accounting Information Systems and Smes Performance
13 pages
Why Is CAR Line
No ratings yet
Why Is CAR Line
2 pages
Presentation On Alarm Management
100% (2)
Presentation On Alarm Management
43 pages
TFTD
No ratings yet
TFTD
39 pages
Crio - Do (A Skill-Lync Company)
No ratings yet
Crio - Do (A Skill-Lync Company)
2 pages
Report English Enrichment Language II[2]
No ratings yet
Report English Enrichment Language II[2]
4 pages
9th Units 5 & 6 LPs
No ratings yet
9th Units 5 & 6 LPs
8 pages
Baby Massage (Lastia024)
No ratings yet
Baby Massage (Lastia024)
21 pages
lcr-100-installation-manual
No ratings yet
lcr-100-installation-manual
2 pages
GRADE 11 - Summative Assessment in Statistics and Probability
100% (2)
GRADE 11 - Summative Assessment in Statistics and Probability
5 pages
Caterpillar Electric Power Ratings Guide
100% (1)
Caterpillar Electric Power Ratings Guide
60 pages
MYP5 Deductive Geometry (Sheet 3)
No ratings yet
MYP5 Deductive Geometry (Sheet 3)
20 pages
Selling Skills For Real Estate Agents
No ratings yet
Selling Skills For Real Estate Agents
21 pages
10A, 400V, 0.550 Ohm, N-Channel Power Mosfet Features: File Number 2311.3 Data Sheet July 1999
No ratings yet
10A, 400V, 0.550 Ohm, N-Channel Power Mosfet Features: File Number 2311.3 Data Sheet July 1999
7 pages
Ad 1861
No ratings yet
Ad 1861
12 pages
Responseessayluckyplanet 8
No ratings yet
Responseessayluckyplanet 8
8 pages
8086 Instruction Set
No ratings yet
8086 Instruction Set
9 pages
Japanese Guide
No ratings yet
Japanese Guide
16 pages
practicle word file (1)
No ratings yet
practicle word file (1)
34 pages
Untitled Diagram - Drawio - Draw - Io
No ratings yet
Untitled Diagram - Drawio - Draw - Io
1 page
NAS - Soal UTS BEC - IF
No ratings yet
NAS - Soal UTS BEC - IF
6 pages
Download full Embodied Activities in Face-to-face and Mediated Settings: Social Encounters in Time and Space Elisabeth Reber ebook all chapters
100% (1)
Download full Embodied Activities in Face-to-face and Mediated Settings: Social Encounters in Time and Space Elisabeth Reber ebook all chapters
55 pages
SJG Vawc
No ratings yet
SJG Vawc
10 pages

DATA-Code 1-050624-120338

Uploaded by

DATA-Code 1-050624-120338

Uploaded by

Code 1

You might also like