1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Creates an server to offload non-critical-path GN targets."""
from __future__ import annotations
import argparse
import json
import os
import queue
import shutil
import socket
import subprocess
import sys
import threading
from typing import Callable, Dict, List, Optional, Tuple
sys.path.append(os.path.join(os.path.dirname(__file__), 'gyp'))
from util import server_utils
def log(msg: str, *, end: str = ''):
# Shrink the message (leaving a 2-char prefix and use the rest of the room
# for the suffix) according to terminal size so it is always one line.
width = shutil.get_terminal_size().columns
prefix = f'[{TaskStats.prefix()}] '
max_msg_width = width - len(prefix)
if len(msg) > max_msg_width:
length_to_show = max_msg_width - 5 # Account for ellipsis and header.
msg = f'{msg[:2]}...{msg[-length_to_show:]}'
# \r to return the carriage to the beginning of line.
# \033[K to replace the normal \n to erase until the end of the line.
# Avoid the default line ending so the next \r overwrites the same line just
# like ninja's output.
print(f'\r{prefix}{msg}\033[K', end=end, flush=True)
class TaskStats:
"""Class to keep track of aggregate stats for all tasks across threads."""
_num_processes = 0
_completed_tasks = 0
_total_tasks = 0
_lock = threading.Lock()
@classmethod
def no_running_processes(cls):
return cls._num_processes == 0
@classmethod
def add_task(cls):
# Only the main thread calls this, so there is no need for locking.
cls._total_tasks += 1
@classmethod
def add_process(cls):
with cls._lock:
cls._num_processes += 1
@classmethod
def remove_process(cls):
with cls._lock:
cls._num_processes -= 1
@classmethod
def complete_task(cls):
with cls._lock:
cls._completed_tasks += 1
@classmethod
def prefix(cls):
# Ninja's prefix is: [205 processes, 6/734 @ 6.5/s : 0.922s ]
# Time taken and task completion rate are not important for the build server
# since it is always running in the background and uses idle priority for
# its tasks.
with cls._lock:
word = 'process' if cls._num_processes == 1 else 'processes'
return (f'{cls._num_processes} {word}, '
f'{cls._completed_tasks}/{cls._total_tasks}')
class TaskManager:
"""Class to encapsulate a threadsafe queue and handle deactivating it."""
def __init__(self):
self._queue: queue.SimpleQueue[Task] = queue.SimpleQueue()
self._deactivated = False
def add_task(self, task: Task):
assert not self._deactivated
TaskStats.add_task()
self._queue.put(task)
log(f'QUEUED {task.name}')
self._maybe_start_tasks()
def deactivate(self):
self._deactivated = True
while not self._queue.empty():
try:
task = self._queue.get_nowait()
except queue.Empty:
return
task.terminate()
@staticmethod
def _num_running_processes():
with open('/proc/stat') as f:
for line in f:
if line.startswith('procs_running'):
return int(line.rstrip().split()[1])
assert False, 'Could not read /proc/stat'
def _maybe_start_tasks(self):
if self._deactivated:
return
# Include load avg so that a small dip in the number of currently running
# processes will not cause new tasks to be started while the overall load is
# heavy.
cur_load = max(self._num_running_processes(), os.getloadavg()[0])
num_started = 0
# Always start a task if we don't have any running, so that all tasks are
# eventually finished. Try starting up tasks when the overall load is light.
# Limit to at most 2 new tasks to prevent ramping up too fast. There is a
# chance where multiple threads call _maybe_start_tasks and each gets to
# spawn up to 2 new tasks, but since the only downside is some build tasks
# get worked on earlier rather than later, it is not worth mitigating.
while num_started < 2 and (TaskStats.no_running_processes()
or num_started + cur_load < os.cpu_count()):
try:
next_task = self._queue.get_nowait()
except queue.Empty:
return
num_started += next_task.start(self._maybe_start_tasks)
# TODO(wnwen): Break this into Request (encapsulating what ninja sends) and Task
# when a Request starts to be run. This would eliminate ambiguity
# about when and whether _proc/_thread are initialized.
class Task:
"""Class to represent one task and operations on it."""
def __init__(self, name: str, cwd: str, cmd: List[str], stamp_file: str):
self.name = name
self.cwd = cwd
self.cmd = cmd
self.stamp_file = stamp_file
self._terminated = False
self._lock = threading.Lock()
self._proc: Optional[subprocess.Popen] = None
self._thread: Optional[threading.Thread] = None
self._return_code: Optional[int] = None
@property
def key(self):
return (self.cwd, self.name)
def start(self, on_complete_callback: Callable[[], None]) -> int:
"""Starts the task if it has not already been terminated.
Returns the number of processes that have been started. This is called at
most once when the task is popped off the task queue."""
# The environment variable forces the script to actually run in order to
# avoid infinite recursion.
env = os.environ.copy()
env[server_utils.BUILD_SERVER_ENV_VARIABLE] = '1'
with self._lock:
if self._terminated:
return 0
# Use os.nice(19) to ensure the lowest priority (idle) for these analysis
# tasks since we want to avoid slowing down the actual build.
# TODO(wnwen): Use ionice to reduce resource consumption.
TaskStats.add_process()
log(f'STARTING {self.name}')
self._proc = subprocess.Popen(
self.cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=self.cwd,
env=env,
text=True,
preexec_fn=lambda: os.nice(19),
)
self._thread = threading.Thread(
target=self._complete_when_process_finishes,
args=(on_complete_callback, ))
self._thread.start()
return 1
def terminate(self):
"""Can be called multiple times to cancel and ignore the task's output."""
with self._lock:
if self._terminated:
return
self._terminated = True
# It is safe to access _proc and _thread outside of _lock since they are
# only changed by self.start holding _lock when self._terminate is false.
# Since we have just set self._terminate to true inside of _lock, we know
# that neither _proc nor _thread will be changed from this point onwards.
if self._proc:
self._proc.terminate()
self._proc.wait()
# Ensure that self._complete is called either by the thread or by us.
if self._thread:
self._thread.join()
else:
self._complete()
def _complete_when_process_finishes(self,
on_complete_callback: Callable[[], None]):
assert self._proc
# We know Popen.communicate will return a str and not a byte since it is
# constructed with text=True.
stdout: str = self._proc.communicate()[0]
self._return_code = self._proc.returncode
TaskStats.remove_process()
self._complete(stdout)
on_complete_callback()
def _complete(self, stdout: str = ''):
"""Update the user and ninja after the task has run or been terminated.
This method should only be run once per task. Avoid modifying the task so
that this method does not need locking."""
TaskStats.complete_task()
failed = False
if self._terminated:
log(f'TERMINATED {self.name}')
# Ignore stdout as it is now outdated.
failed = True
else:
log(f'FINISHED {self.name}')
if stdout or self._return_code != 0:
failed = True
# An extra new line is needed since we want to preserve the previous
# _log line. Use a single print so that it is threadsafe.
# TODO(wnwen): Improve stdout display by parsing over it and moving the
# actual error to the bottom. Otherwise long command lines
# in the Traceback section obscure the actual error(s).
print('\n' + '\n'.join([
f'FAILED: {self.name}',
f'Return code: {self._return_code}',
' '.join(self.cmd),
stdout,
]))
if failed:
# Force ninja to consider failed targets as dirty.
try:
os.unlink(os.path.join(self.cwd, self.stamp_file))
except FileNotFoundError:
pass
else:
# Ninja will rebuild targets when their inputs change even if their stamp
# file has a later modified time. Thus we do not need to worry about the
# script being run by the build server updating the mtime incorrectly.
pass
def _listen_for_request_data(sock: socket.socket):
while True:
conn = sock.accept()[0]
received = []
with conn:
while True:
data = conn.recv(4096)
if not data:
break
received.append(data)
if received:
yield json.loads(b''.join(received))
def _process_requests(sock: socket.socket):
# Since dicts in python can contain anything, explicitly type tasks to help
# make static type checking more useful.
tasks: Dict[Tuple[str, str], Task] = {}
task_manager = TaskManager()
try:
for data in _listen_for_request_data(sock):
task = Task(name=data['name'],
cwd=data['cwd'],
cmd=data['cmd'],
stamp_file=data['stamp_file'])
existing_task = tasks.get(task.key)
if existing_task:
existing_task.terminate()
tasks[task.key] = task
task_manager.add_task(task)
except KeyboardInterrupt:
log('STOPPING SERVER...', end='\n')
# Gracefully shut down the task manager, terminating all queued tasks.
task_manager.deactivate()
# Terminate all currently running tasks.
for task in tasks.values():
task.terminate()
log('STOPPED', end='\n')
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.parse_args()
with socket.socket(socket.AF_UNIX) as sock:
sock.bind(server_utils.SOCKET_ADDRESS)
sock.listen()
_process_requests(sock)
if __name__ == '__main__':
sys.exit(main())
|