2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_validity", VCPU_STAT(exit_validity) },
35 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
36 { "exit_external_request", VCPU_STAT(exit_external_request) },
37 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
38 { "exit_instruction", VCPU_STAT(exit_instruction) },
39 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
40 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
41 { "instruction_lctg", VCPU_STAT(instruction_lctg) },
42 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
43 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
44 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
45 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
46 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
47 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
48 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
49 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
50 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
51 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
52 { "instruction_spx", VCPU_STAT(instruction_spx) },
53 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
54 { "instruction_stap", VCPU_STAT(instruction_stap) },
55 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
56 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
57 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
58 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
59 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
60 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
61 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
62 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
63 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
64 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
65 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
66 { "diagnose_44", VCPU_STAT(diagnose_44) },
71 /* Section: not file related */
72 void kvm_arch_hardware_enable(void *garbage)
74 /* every s390 is virtualization enabled ;-) */
77 void kvm_arch_hardware_disable(void *garbage)
81 void decache_vcpus_on_cpu(int cpu)
85 int kvm_arch_hardware_setup(void)
90 void kvm_arch_hardware_unsetup(void)
94 void kvm_arch_check_processor_compat(void *rtn)
98 int kvm_arch_init(void *opaque)
103 void kvm_arch_exit(void)
107 /* Section: device related */
108 long kvm_arch_dev_ioctl(struct file *filp,
109 unsigned int ioctl, unsigned long arg)
111 if (ioctl == KVM_S390_ENABLE_SIE)
112 return s390_enable_sie();
116 int kvm_dev_ioctl_check_extension(long ext)
121 /* Section: vm related */
123 * Get (and clear) the dirty memory log for a memory slot.
125 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
131 long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
145 r = kvm_s390_inject_vm(kvm, &s390int);
155 struct kvm *kvm_arch_create_vm(void)
161 rc = s390_enable_sie();
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
174 sprintf(debug_name, "kvm-%u", current->pid);
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
186 try_module_get(THIS_MODULE);
190 free_page((unsigned long)(kvm->arch.sca));
197 void kvm_arch_destroy_vm(struct kvm *kvm)
199 debug_unregister(kvm->arch.dbf);
200 free_page((unsigned long)(kvm->arch.sca));
202 module_put(THIS_MODULE);
205 /* Section: vcpu related */
206 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
211 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
213 /* kvm common code refers to this, but does'nt call it */
217 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
219 save_fp_regs(&vcpu->arch.host_fpregs);
220 save_access_regs(vcpu->arch.host_acrs);
221 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
222 restore_fp_regs(&vcpu->arch.guest_fpregs);
223 restore_access_regs(vcpu->arch.guest_acrs);
225 if (signal_pending(current))
226 atomic_set_mask(CPUSTAT_STOP_INT,
227 &vcpu->arch.sie_block->cpuflags);
230 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
232 save_fp_regs(&vcpu->arch.guest_fpregs);
233 save_access_regs(vcpu->arch.guest_acrs);
234 restore_fp_regs(&vcpu->arch.host_fpregs);
235 restore_access_regs(vcpu->arch.host_acrs);
238 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
240 /* this equals initial cpu reset in pop, but we don't switch to ESA */
241 vcpu->arch.sie_block->gpsw.mask = 0UL;
242 vcpu->arch.sie_block->gpsw.addr = 0UL;
243 vcpu->arch.sie_block->prefix = 0UL;
244 vcpu->arch.sie_block->ihcpu = 0xffff;
245 vcpu->arch.sie_block->cputm = 0UL;
246 vcpu->arch.sie_block->ckc = 0UL;
247 vcpu->arch.sie_block->todpr = 0;
248 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
249 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
250 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
251 vcpu->arch.guest_fpregs.fpc = 0;
252 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
253 vcpu->arch.sie_block->gbea = 1;
256 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
258 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
259 vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
260 vcpu->arch.sie_block->gmsor = 0x000000000000;
261 vcpu->arch.sie_block->ecb = 2;
262 vcpu->arch.sie_block->eca = 0xC1002001U;
263 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
264 (unsigned long) vcpu);
265 get_cpu_id(&vcpu->arch.cpu_id);
266 vcpu->arch.cpu_id.version = 0xfe;
270 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
273 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
279 vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
281 if (!vcpu->arch.sie_block)
284 vcpu->arch.sie_block->icpua = id;
285 BUG_ON(!kvm->arch.sca);
286 BUG_ON(kvm->arch.sca->cpu[id].sda);
287 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
288 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
289 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
291 spin_lock_init(&vcpu->arch.local_int.lock);
292 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
293 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
294 spin_lock_bh(&kvm->arch.float_int.lock);
295 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
296 init_waitqueue_head(&vcpu->arch.local_int.wq);
297 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
298 spin_unlock_bh(&kvm->arch.float_int.lock);
300 rc = kvm_vcpu_init(vcpu, kvm, id);
303 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
304 vcpu->arch.sie_block);
306 try_module_get(THIS_MODULE);
315 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
317 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
318 free_page((unsigned long)(vcpu->arch.sie_block));
320 module_put(THIS_MODULE);
323 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
325 /* kvm common code refers to this, but never calls it */
330 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
333 kvm_s390_vcpu_initial_reset(vcpu);
338 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
341 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
346 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
349 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
354 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
355 struct kvm_sregs *sregs)
358 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
359 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
364 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
365 struct kvm_sregs *sregs)
368 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
369 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
374 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
377 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
378 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
383 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
386 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
387 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
392 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
397 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
400 vcpu->arch.sie_block->gpsw = psw;
405 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
406 struct kvm_translation *tr)
408 return -EINVAL; /* not implemented yet */
411 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
412 struct kvm_debug_guest *dbg)
414 return -EINVAL; /* not implemented yet */
417 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
418 struct kvm_mp_state *mp_state)
420 return -EINVAL; /* not implemented yet */
423 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
424 struct kvm_mp_state *mp_state)
426 return -EINVAL; /* not implemented yet */
429 static void __vcpu_run(struct kvm_vcpu *vcpu)
431 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
436 vcpu->arch.sie_block->icptcode = 0;
440 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
441 atomic_read(&vcpu->arch.sie_block->cpuflags));
442 sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
443 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
444 vcpu->arch.sie_block->icptcode);
449 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
452 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
459 if (vcpu->sigset_active)
460 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
462 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
464 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
466 switch (kvm_run->exit_reason) {
467 case KVM_EXIT_S390_SIEIC:
468 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
469 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
471 case KVM_EXIT_UNKNOWN:
472 case KVM_EXIT_S390_RESET:
481 kvm_s390_deliver_pending_interrupts(vcpu);
483 rc = kvm_handle_sie_intercept(vcpu);
484 } while (!signal_pending(current) && !rc);
486 if (signal_pending(current) && !rc)
489 if (rc == -ENOTSUPP) {
490 /* intercept cannot be handled in-kernel, prepare kvm-run */
491 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
492 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
493 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
494 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
495 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
496 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
500 if (rc == -EREMOTE) {
501 /* intercept was handled, but userspace support is needed
502 * kvm_run has been prepared by the handler */
506 if (vcpu->sigset_active)
507 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
511 vcpu->stat.exit_userspace++;
515 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
516 unsigned long n, int prefix)
519 return copy_to_guest(vcpu, guestdest, from, n);
521 return copy_to_guest_absolute(vcpu, guestdest, from, n);
525 * store status at address
526 * we use have two special cases:
527 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
528 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
530 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
532 const unsigned char archmode = 1;
535 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
536 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
538 addr = SAVE_AREA_BASE;
540 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
541 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
543 addr = SAVE_AREA_BASE;
548 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
549 vcpu->arch.guest_fpregs.fprs, 128, prefix))
552 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
553 vcpu->arch.guest_gprs, 128, prefix))
556 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
557 &vcpu->arch.sie_block->gpsw, 16, prefix))
560 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
561 &vcpu->arch.sie_block->prefix, 4, prefix))
564 if (__guestcopy(vcpu,
565 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
566 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
570 &vcpu->arch.sie_block->todpr, 4, prefix))
573 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
574 &vcpu->arch.sie_block->cputm, 8, prefix))
577 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
578 &vcpu->arch.sie_block->ckc, 8, prefix))
581 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
582 &vcpu->arch.guest_acrs, 64, prefix))
585 if (__guestcopy(vcpu,
586 addr + offsetof(struct save_area_s390x, ctrl_regs),
587 &vcpu->arch.sie_block->gcr, 128, prefix))
592 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
597 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
602 long kvm_arch_vcpu_ioctl(struct file *filp,
603 unsigned int ioctl, unsigned long arg)
605 struct kvm_vcpu *vcpu = filp->private_data;
606 void __user *argp = (void __user *)arg;
609 case KVM_S390_INTERRUPT: {
610 struct kvm_s390_interrupt s390int;
612 if (copy_from_user(&s390int, argp, sizeof(s390int)))
614 return kvm_s390_inject_vcpu(vcpu, &s390int);
616 case KVM_S390_STORE_STATUS:
617 return kvm_s390_vcpu_store_status(vcpu, arg);
618 case KVM_S390_SET_INITIAL_PSW: {
621 if (copy_from_user(&psw, argp, sizeof(psw)))
623 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
625 case KVM_S390_INITIAL_RESET:
626 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
633 /* Section: memory related */
634 int kvm_arch_set_memory_region(struct kvm *kvm,
635 struct kvm_userspace_memory_region *mem,
636 struct kvm_memory_slot old,
639 /* A few sanity checks. We can have exactly one memory slot which has
640 to start at guest virtual zero and which has to be located at a
641 page boundary in userland and which has to end at a page boundary.
642 The memory in userland is ok to be fragmented into various different
643 vmas. It is okay to mmap() and munmap() stuff in this slot after
644 doing this call at any time */
649 if (mem->guest_phys_addr)
652 if (mem->userspace_addr & (PAGE_SIZE - 1))
655 if (mem->memory_size & (PAGE_SIZE - 1))
658 kvm->arch.guest_origin = mem->userspace_addr;
659 kvm->arch.guest_memsize = mem->memory_size;
661 /* FIXME: we do want to interrupt running CPUs and update their memory
662 configuration now to avoid race conditions. But hey, changing the
663 memory layout while virtual CPUs are running is usually bad
664 programming practice. */
669 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
674 static int __init kvm_s390_init(void)
676 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
679 static void __exit kvm_s390_exit(void)
684 module_init(kvm_s390_init);
685 module_exit(kvm_s390_exit);