2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/hrtimer.h>
19 #include <linux/init.h>
20 #include <linux/kvm.h>
21 #include <linux/kvm_host.h>
22 #include <linux/module.h>
23 #include <linux/slab.h>
24 #include <linux/timer.h>
25 #include <asm/lowcore.h>
26 #include <asm/pgtable.h>
31 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
33 struct kvm_stats_debugfs_item debugfs_entries[] = {
34 { "userspace_handled", VCPU_STAT(exit_userspace) },
35 { "exit_null", VCPU_STAT(exit_null) },
36 { "exit_validity", VCPU_STAT(exit_validity) },
37 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
38 { "exit_external_request", VCPU_STAT(exit_external_request) },
39 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
40 { "exit_instruction", VCPU_STAT(exit_instruction) },
41 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
42 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
43 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
44 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
45 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
46 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
47 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
48 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
49 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
50 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
51 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
52 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
53 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
54 { "instruction_spx", VCPU_STAT(instruction_spx) },
55 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
56 { "instruction_stap", VCPU_STAT(instruction_stap) },
57 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
58 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
59 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
60 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
61 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
62 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
63 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
64 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
65 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
66 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
67 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
68 { "diagnose_44", VCPU_STAT(diagnose_44) },
73 /* Section: not file related */
74 void kvm_arch_hardware_enable(void *garbage)
76 /* every s390 is virtualization enabled ;-) */
79 void kvm_arch_hardware_disable(void *garbage)
83 int kvm_arch_hardware_setup(void)
88 void kvm_arch_hardware_unsetup(void)
92 void kvm_arch_check_processor_compat(void *rtn)
96 int kvm_arch_init(void *opaque)
101 void kvm_arch_exit(void)
105 /* Section: device related */
106 long kvm_arch_dev_ioctl(struct file *filp,
107 unsigned int ioctl, unsigned long arg)
109 if (ioctl == KVM_S390_ENABLE_SIE)
110 return s390_enable_sie();
114 int kvm_dev_ioctl_check_extension(long ext)
122 /* Section: vm related */
124 * Get (and clear) the dirty memory log for a memory slot.
126 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
127 struct kvm_dirty_log *log)
132 long kvm_arch_vm_ioctl(struct file *filp,
133 unsigned int ioctl, unsigned long arg)
135 struct kvm *kvm = filp->private_data;
136 void __user *argp = (void __user *)arg;
140 case KVM_S390_INTERRUPT: {
141 struct kvm_s390_interrupt s390int;
144 if (copy_from_user(&s390int, argp, sizeof(s390int)))
146 r = kvm_s390_inject_vm(kvm, &s390int);
156 struct kvm *kvm_arch_create_vm(void)
162 rc = s390_enable_sie();
167 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
171 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
175 sprintf(debug_name, "kvm-%u", current->pid);
177 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
181 spin_lock_init(&kvm->arch.float_int.lock);
182 INIT_LIST_HEAD(&kvm->arch.float_int.list);
184 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
185 VM_EVENT(kvm, 3, "%s", "vm created");
189 free_page((unsigned long)(kvm->arch.sca));
196 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
198 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
199 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
200 (__u64) vcpu->arch.sie_block)
201 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
203 free_page((unsigned long)(vcpu->arch.sie_block));
204 kvm_vcpu_uninit(vcpu);
208 static void kvm_free_vcpus(struct kvm *kvm)
212 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
214 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
215 kvm->vcpus[i] = NULL;
220 void kvm_arch_sync_events(struct kvm *kvm)
224 void kvm_arch_destroy_vm(struct kvm *kvm)
227 kvm_free_physmem(kvm);
228 free_page((unsigned long)(kvm->arch.sca));
229 debug_unregister(kvm->arch.dbf);
233 /* Section: vcpu related */
234 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
239 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
244 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
246 save_fp_regs(&vcpu->arch.host_fpregs);
247 save_access_regs(vcpu->arch.host_acrs);
248 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
249 restore_fp_regs(&vcpu->arch.guest_fpregs);
250 restore_access_regs(vcpu->arch.guest_acrs);
253 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
255 save_fp_regs(&vcpu->arch.guest_fpregs);
256 save_access_regs(vcpu->arch.guest_acrs);
257 restore_fp_regs(&vcpu->arch.host_fpregs);
258 restore_access_regs(vcpu->arch.host_acrs);
261 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
263 /* this equals initial cpu reset in pop, but we don't switch to ESA */
264 vcpu->arch.sie_block->gpsw.mask = 0UL;
265 vcpu->arch.sie_block->gpsw.addr = 0UL;
266 vcpu->arch.sie_block->prefix = 0UL;
267 vcpu->arch.sie_block->ihcpu = 0xffff;
268 vcpu->arch.sie_block->cputm = 0UL;
269 vcpu->arch.sie_block->ckc = 0UL;
270 vcpu->arch.sie_block->todpr = 0;
271 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
272 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
273 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
274 vcpu->arch.guest_fpregs.fpc = 0;
275 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
276 vcpu->arch.sie_block->gbea = 1;
279 /* The current code can have up to 256 pages for virtio */
280 #define VIRTIODESCSPACE (256ul * 4096ul)
282 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
284 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
285 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
286 vcpu->kvm->arch.guest_origin +
287 VIRTIODESCSPACE - 1ul;
288 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
289 vcpu->arch.sie_block->ecb = 2;
290 vcpu->arch.sie_block->eca = 0xC1002001U;
291 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
292 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
293 (unsigned long) vcpu);
294 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
295 get_cpu_id(&vcpu->arch.cpu_id);
296 vcpu->arch.cpu_id.version = 0xff;
300 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
303 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
309 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
310 get_zeroed_page(GFP_KERNEL);
312 if (!vcpu->arch.sie_block)
315 vcpu->arch.sie_block->icpua = id;
316 BUG_ON(!kvm->arch.sca);
317 if (!kvm->arch.sca->cpu[id].sda)
318 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
320 BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
321 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
322 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
324 spin_lock_init(&vcpu->arch.local_int.lock);
325 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
326 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
327 spin_lock(&kvm->arch.float_int.lock);
328 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
329 init_waitqueue_head(&vcpu->arch.local_int.wq);
330 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
331 spin_unlock(&kvm->arch.float_int.lock);
333 rc = kvm_vcpu_init(vcpu, kvm, id);
336 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
337 vcpu->arch.sie_block);
346 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
348 /* kvm common code refers to this, but never calls it */
353 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
356 kvm_s390_vcpu_initial_reset(vcpu);
361 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
364 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
369 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
372 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
377 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
378 struct kvm_sregs *sregs)
381 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
382 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
387 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
388 struct kvm_sregs *sregs)
391 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
392 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
397 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
400 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
401 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
406 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
409 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
410 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
415 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
420 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
423 vcpu->arch.sie_block->gpsw = psw;
428 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
429 struct kvm_translation *tr)
431 return -EINVAL; /* not implemented yet */
434 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
435 struct kvm_guest_debug *dbg)
437 return -EINVAL; /* not implemented yet */
440 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
441 struct kvm_mp_state *mp_state)
443 return -EINVAL; /* not implemented yet */
446 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
447 struct kvm_mp_state *mp_state)
449 return -EINVAL; /* not implemented yet */
452 static void __vcpu_run(struct kvm_vcpu *vcpu)
454 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
459 if (test_thread_flag(TIF_MCCK_PENDING))
462 kvm_s390_deliver_pending_interrupts(vcpu);
464 vcpu->arch.sie_block->icptcode = 0;
468 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
469 atomic_read(&vcpu->arch.sie_block->cpuflags));
470 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
471 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
472 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
474 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
475 vcpu->arch.sie_block->icptcode);
480 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
483 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
490 /* verify, that memory has been registered */
491 if (!vcpu->kvm->arch.guest_memsize) {
496 if (vcpu->sigset_active)
497 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
499 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
501 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
503 switch (kvm_run->exit_reason) {
504 case KVM_EXIT_S390_SIEIC:
505 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
506 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
508 case KVM_EXIT_UNKNOWN:
509 case KVM_EXIT_S390_RESET:
519 rc = kvm_handle_sie_intercept(vcpu);
520 } while (!signal_pending(current) && !rc);
522 if (signal_pending(current) && !rc)
525 if (rc == -ENOTSUPP) {
526 /* intercept cannot be handled in-kernel, prepare kvm-run */
527 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
528 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
529 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
530 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
531 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
532 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
536 if (rc == -EREMOTE) {
537 /* intercept was handled, but userspace support is needed
538 * kvm_run has been prepared by the handler */
542 if (vcpu->sigset_active)
543 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
547 vcpu->stat.exit_userspace++;
551 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
552 unsigned long n, int prefix)
555 return copy_to_guest(vcpu, guestdest, from, n);
557 return copy_to_guest_absolute(vcpu, guestdest, from, n);
561 * store status at address
562 * we use have two special cases:
563 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
564 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
566 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
568 const unsigned char archmode = 1;
571 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
572 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
574 addr = SAVE_AREA_BASE;
576 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
577 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
579 addr = SAVE_AREA_BASE;
584 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
585 vcpu->arch.guest_fpregs.fprs, 128, prefix))
588 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
589 vcpu->arch.guest_gprs, 128, prefix))
592 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
593 &vcpu->arch.sie_block->gpsw, 16, prefix))
596 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
597 &vcpu->arch.sie_block->prefix, 4, prefix))
600 if (__guestcopy(vcpu,
601 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
602 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
605 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
606 &vcpu->arch.sie_block->todpr, 4, prefix))
609 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
610 &vcpu->arch.sie_block->cputm, 8, prefix))
613 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
614 &vcpu->arch.sie_block->ckc, 8, prefix))
617 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
618 &vcpu->arch.guest_acrs, 64, prefix))
621 if (__guestcopy(vcpu,
622 addr + offsetof(struct save_area_s390x, ctrl_regs),
623 &vcpu->arch.sie_block->gcr, 128, prefix))
628 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
633 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
638 long kvm_arch_vcpu_ioctl(struct file *filp,
639 unsigned int ioctl, unsigned long arg)
641 struct kvm_vcpu *vcpu = filp->private_data;
642 void __user *argp = (void __user *)arg;
645 case KVM_S390_INTERRUPT: {
646 struct kvm_s390_interrupt s390int;
648 if (copy_from_user(&s390int, argp, sizeof(s390int)))
650 return kvm_s390_inject_vcpu(vcpu, &s390int);
652 case KVM_S390_STORE_STATUS:
653 return kvm_s390_vcpu_store_status(vcpu, arg);
654 case KVM_S390_SET_INITIAL_PSW: {
657 if (copy_from_user(&psw, argp, sizeof(psw)))
659 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
661 case KVM_S390_INITIAL_RESET:
662 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
669 /* Section: memory related */
670 int kvm_arch_set_memory_region(struct kvm *kvm,
671 struct kvm_userspace_memory_region *mem,
672 struct kvm_memory_slot old,
677 /* A few sanity checks. We can have exactly one memory slot which has
678 to start at guest virtual zero and which has to be located at a
679 page boundary in userland and which has to end at a page boundary.
680 The memory in userland is ok to be fragmented into various different
681 vmas. It is okay to mmap() and munmap() stuff in this slot after
682 doing this call at any time */
684 if (mem->slot || kvm->arch.guest_memsize)
687 if (mem->guest_phys_addr)
690 if (mem->userspace_addr & (PAGE_SIZE - 1))
693 if (mem->memory_size & (PAGE_SIZE - 1))
700 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
703 if (!mutex_trylock(&kvm->vcpus[i]->mutex))
707 kvm->arch.guest_origin = mem->userspace_addr;
708 kvm->arch.guest_memsize = mem->memory_size;
710 /* update sie control blocks, and unlock all vcpus */
711 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
713 kvm->vcpus[i]->arch.sie_block->gmsor =
714 kvm->arch.guest_origin;
715 kvm->vcpus[i]->arch.sie_block->gmslm =
716 kvm->arch.guest_memsize +
717 kvm->arch.guest_origin +
718 VIRTIODESCSPACE - 1ul;
719 mutex_unlock(&kvm->vcpus[i]->mutex);
727 mutex_unlock(&kvm->vcpus[i]->mutex);
731 void kvm_arch_flush_shadow(struct kvm *kvm)
735 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
740 static int __init kvm_s390_init(void)
742 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
745 static void __exit kvm_s390_exit(void)
750 module_init(kvm_s390_init);
751 module_exit(kvm_s390_exit);