77from mapswipe_workers .definitions import (
88 OHSOME_API_LINK ,
99 OSM_API_LINK ,
10+ OSMCHA_API_KEY ,
11+ OSMCHA_API_LINK ,
1012 CustomError ,
1113 logger ,
1214)
@@ -22,12 +24,16 @@ def remove_troublesome_chars(string: str):
2224 return string
2325
2426
25- def retry_get (url , retries = 3 , timeout = 4 ):
27+ def retry_get (url , retries = 3 , timeout = 4 , to_osmcha : bool = False ):
2628 """Retry a query for a variable amount of tries."""
2729 retry = Retry (total = retries )
2830 with requests .Session () as session :
2931 session .mount ("https://" , HTTPAdapter (max_retries = retry ))
30- return session .get (url , timeout = timeout )
32+ if to_osmcha :
33+ headers = {"Authorization" : f"Token { OSMCHA_API_KEY } " }
34+ return session .get (url , timeout = timeout , headers = headers )
35+ else :
36+ return session .get (url , timeout = timeout )
3137
3238
3339def geojsonToFeatureCollection (geojson : dict ) -> dict :
@@ -49,6 +55,29 @@ def chunks(arr, n_objects):
4955 ]
5056
5157
58+ def query_osmcha (changeset_ids : list , changeset_results ):
59+ """Get data from changesetId."""
60+ id_string = "," .join (map (str , changeset_ids ))
61+
62+ url = OSMCHA_API_LINK + f"changesets/?ids={ id_string } "
63+ response = retry_get (url , to_osmcha = True )
64+ if response .status_code != 200 :
65+ err = f"osmcha request failed: { response .status_code } "
66+ logger .warning (f"{ err } " )
67+ logger .warning (response .json ())
68+ raise CustomError (err )
69+ response = response .json ()
70+ for feature in response ["features" ]:
71+ changeset_results [int (feature ["id" ])] = {
72+ "username" : remove_troublesome_chars (feature ["properties" ]["user" ]),
73+ "userid" : feature ["properties" ]["uid" ],
74+ "comment" : remove_troublesome_chars (feature ["properties" ]["comment" ]),
75+ "editor" : remove_troublesome_chars (feature ["properties" ]["editor" ]),
76+ }
77+
78+ return changeset_results
79+
80+
5281def query_osm (changeset_ids : list , changeset_results ):
5382 """Get data from changesetId."""
5483 id_string = "," .join (map (str , changeset_ids ))
@@ -77,14 +106,17 @@ def query_osm(changeset_ids: list, changeset_results):
77106 "username" : remove_troublesome_chars (username ),
78107 "userid" : userid ,
79108 "comment" : remove_troublesome_chars (comment ),
80- "created_by " : remove_troublesome_chars (created_by ),
109+ "editor " : remove_troublesome_chars (created_by ),
81110 }
82111 return changeset_results
83112
84113
85114def remove_noise_and_add_user_info (json : dict ) -> dict :
86115 """Delete unwanted information from properties."""
87116 logger .info ("starting filtering and adding extra info" )
117+ batch_size = 100
118+
119+ # remove noise
88120 changeset_results = {}
89121
90122 missing_rows = {
@@ -106,21 +138,37 @@ def remove_noise_and_add_user_info(json: dict) -> dict:
106138 changeset_results [new_properties ["changesetId" ]] = None
107139 feature ["properties" ] = new_properties
108140
141+ # add info
109142 len_osm = len (changeset_results .keys ())
110- batches = int (len (changeset_results .keys ()) / 100 ) + 1
143+ batches = int (len (changeset_results .keys ()) / batch_size ) + 1
144+ logger .info (
145+ f"""{ len_osm } changesets will be queried in roughly { batches } batches from osmCHA""" # noqa E501
146+ )
147+
148+ chunk_list = chunks (list (changeset_results .keys ()), batch_size )
149+ for i , subset in enumerate (chunk_list ):
150+ changeset_results = query_osmcha (subset , changeset_results )
151+ progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
152+ logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
153+
154+ missing_ids = [i for i , v in changeset_results .items () if v is None ]
155+ chunk_list = chunks (missing_ids , batch_size )
156+ batches = int (len (missing_ids ) / batch_size ) + 1
111157 logger .info (
112- f"""{ len_osm } changesets will be queried in roughly { batches } batches"""
158+ f"""{ len ( missing_ids ) } changesets where missing from osmCHA and are now queried via osmAPI in { batches } batches""" # noqa E501
113159 )
114- chunk_list = chunks (list (changeset_results .keys ()), 100 )
115160 for i , subset in enumerate (chunk_list ):
116161 changeset_results = query_osm (subset , changeset_results )
117162 progress = round (100 * ((i + 1 ) / len (chunk_list )), 1 )
118163 logger .info (f"finished query { i + 1 } /{ len (chunk_list )} , { progress } " )
119164
120165 for feature in json ["features" ]:
121- changeset = changeset_results [feature ["properties" ]["changesetId" ]]
122- for attribute_name in ["username" , "comment" , "created_by" , "userid" ]:
123- feature ["properties" ][attribute_name ] = changeset [attribute_name ]
166+ changeset = changeset_results [int (feature ["properties" ]["changesetId" ])]
167+ for attribute_name in ["username" , "comment" , "editor" , "userid" ]:
168+ if attribute_name == "userid" :
169+ feature ["properties" ][attribute_name ] = int (changeset [attribute_name ])
170+ else :
171+ feature ["properties" ][attribute_name ] = changeset [attribute_name ]
124172
125173 logger .info ("finished filtering and adding extra info" )
126174 if any (x > 0 for x in missing_rows .values ()):
0 commit comments